home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Chip 2007 January, February, March & April
/
Chip-Cover-CD-2007-02.iso
/
Pakiet bezpieczenstwa
/
mini Pentoo LiveCD 2006.1
/
mpentoo-2006.1.iso
/
livecd.squashfs
/
usr
/
share
/
nikto
/
plugins
/
LW.pm
next >
Wrap
Text File
|
2005-10-19
|
145KB
|
10,308 lines
# libwhisker v1.7
# libwhisker is a collection of routines used by whisker
#
# libwhisker copyright 2000,2001,2002 rfp.labs
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
#
# More information can be found at http://www.wiretrip.net/rfp/
# Libwhisker mailing list and resources are also available at
# http://sourceforge.net/projects/whisker/
#
package LW;
use 5.004;
$LW::VERSION="1.7";
####### external module tests ###################################
BEGIN {
## LW module manager stuff ##
%LW::available = ();
$LW::LW_HAS_SOCKET = 0;
$LW::LW_HAS_SSL = 0;
$LW::LW_SSL_LIB = 0;
$LW::LW_NONBLOCK_CONNECT= 0;
## binary helper - may contain functions substituted further down ##
eval "use LW::bin"; # do we have libwhisker binary helpers?
if($@){ $LW::available{'LW::bin'}=$LW::bin::VERSION; }
## encode subpackage ##
eval "require MIME::Base64";
if($@){
*encode_base64 = \&encode_base64_perl;
*decode_base64 = \&decode_base64_perl;
} else{
# MIME::Base64 typically has faster C versions
$LW::available{'mime::base64'}=$MIME::Base64::VERSION;
*encode_base64 = \&MIME::Base64::encode_base64;
*decode_base64 = \&MIME::Base64::decode_base64;}
## md5 subpackage ##
eval "require MD5";
if(!$@){ $LW::available{'md5'}=$MD5::VERSION;}
## http subpackage ##
eval "use Socket"; # do we have socket support?
if($@){ $LW::LW_HAS_SOCKET=0; }
else { $LW::LW_HAS_SOCKET=1;
$LW::available{'socket'}=$Socket::VERSION;}
if($LW_HAS_SOCKET){
eval "use Net::SSLeay"; # do we have SSL support?
if($@){ $LW::LW_HAS_SSL=0; }
else { $LW::LW_HAS_SSL=1;
$LW::LW_SSL_LIB=1;
$LW::available{'net::ssleay'}=$Net::SSLeay::VERSION;
Net::SSLeay::load_error_strings();
Net::SSLeay::SSLeay_add_ssl_algorithms();
Net::SSLeay::randomize();}
if(!$LW::LW_HAS_SSL){
eval "use Net::SSL"; # different SSL lib
if($@){ $LW::LW_HAS_SSL=0; }
else { $LW::LW_HAS_SSL=1;
$LW::LW_SSL_LIB=2;
$LW::available{'net::ssl'}=$Net::SSL::VERSION;}
}
## non-blocking IO ##
if($^O!~/Win32/){
eval "use POSIX qw(:errno_h :fcntl_h)"; # better
if(!$@){
$LW::LW_NONBLOCK_CONNECT=1;
}
}
} # if($LW_HAS_SOCKET)
} # BEGIN
####### package variables #######################################
## crawl subpackage ##
%LW::crawl_config=( 'save_cookies' => 0,
'reuse_cookies' => 1,
'save_offsites' => 0,
'follow_moves' => 1,
'url_limit' => 1000,
'use_params' => 0,
'params_double_record' => 0,
'skip_ext' => '.gif .jpg .gz .mp3 .swf .zip ',
'save_skipped' => 0,
'save_referrers'=> 0,
'do_head' => 0,
'callback' => 0,
'slashdot_bug' => 1,
'normalize_uri' => 1,
'source_callback' => 0
);
@LW::crawl_urls=();;
%LW::crawl_server_tags=();
%LW::crawl_referrers=();
%LW::crawl_offsites=();
%LW::crawl_cookies=();
%LW::crawl_forms=();
%LW::crawl_temp=();
# this idea/structure was taken from HTML::LinkExtor.pm,
# copyright 2000 Gisle Aas and Michael A. Chase
%LW::crawl_linktags = (
'a' => 'href',
'applet' => [qw(codebase archive code)],
'area' => 'href',
'base' => 'href',
'bgsound' => 'src',
'blockquote' => 'cite',
'body' => 'background',
'del' => 'cite',
'embed' => [qw(src pluginspage)],
'form' => 'action',
'frame' => [qw(src longdesc)],
'iframe' => [qw(src longdesc)],
'ilayer' => 'background',
'img' => [qw(src lowsrc longdesc usemap)],
'input' => [qw(src usemap)],
'ins' => 'cite',
'isindex' => 'action',
'head' => 'profile',
'layer' => [qw(background src)],
'link' => 'href',
'object' => [qw(codebase data archive usemap)],
'q' => 'cite',
'script' => 'src',
'table' => 'background',
'td' => 'background',
'th' => 'background',
'xmp' => 'href',
);
## forms subpackage ##
@LW::forms_found=();
%LW::forms_current=();
## http subpackage ##
my $SOCKSTATE=0;
my $TIMEOUT=10; # default
my ($STATS_REQS,$STATS_SYNS)=(0,0);
my ($LAST_HOST,$LAST_INET_ATON,$LAST_SSL)=('','',0);
my ($OUTGOING_QUEUE,$INCOMING_QUEUE)=('','');
my ($SSL_CTX, $SSL_THINGY);
my %http_host_cache=();
# order is following:
# [0] - SOCKET
# [1] - $SOCKSTATE
# [2] - INET_ATON
# [3] - $SSL_CTX
# [4] - $SSL_THINGY
# [5] - $OUTGOING_QUEUE
# [6] - $INCOMING_QUEUE
# [7] - $STATS_SYNS
# [8] - $STATS_REQS
my $Z; # array ref to current host specs
=pod
=head1 ++ Sub package: anti-ids
The anti-ids sub package implements management routines for various
rewriting/encoding in order to evade intrusion detection systems.
=cut
########################################################################
=pod
=head1 - Function: LW::anti_ids
Params: \%hin, $modes
Return: nothing
LW::anti_ids computes the proper anti-ids encoding/tricks specified by
$modes, and sets up %hin in order to use those tricks. Valid modes
are (the mode numbers are the same as those found in whisker 1.4):
1 - Encode some of the characters via normal URL encoding
2 - Insert directory self-references (/./)
3 - Premature URL ending (make it appear the request line is done)
4 - Prepend a long random string in the form of "/string/../URL"
5 - Add a fake URL parameter
6 - Use a tab instead of a space as a request spacer
7 - Change the case of the URL around (works against Windows and Novell)
8 - Change normal seperators ('/') to Windows version ('\')
9 - Session splicing (sending data in multiple packets)
You can set multiple modes by setting the string to contain all the modes
desired; i.e. $modes="146" will use modes 1, 4, and 6.
=cut
sub anti_ids {
my ($rhin,$modes)=(shift,shift);
my (@T,$x,$c,$s,$y);
my $ENCODED=0;
my $W = $$rhin{'whisker'};
return if(!(defined $rhin && ref($rhin)));
# in case they didn't do it already
$$rhin{'whisker'}->{'uri_orig'}=$$rhin{'whisker'}->{'uri'};
# note: order is important!
# mode 9 - session splicing
if($modes=~/9/){
$$rhin{'whisker'}->{'ids_session_splice'}=1;
}
# mode 4 - prepend long random string
if($modes=~/4/){$s='';
if($$W{'uri'}=~m#^/#){
$y=&utils_randstr;
$s.=$y while(length($s)<512);
$$W{'uri'}="/$s/..".$$W{'uri'};
}
}
# mode 7 - (windows) random case sensitivity
if($modes=~/7/){
@T=split(//,$$W{'uri'});
for($x=0;$x<(scalar @T);$x++){
if((rand()*2)%2 == 1){
$T[$x]=uc($T[$x]);}}
$$W{'uri'}=join('',@T);
}
# mode 2 - directory self-reference (/./)
if($modes=~/2/){
$$W{'uri'}=~s#/#/./#g;
}
# mode 8 - windows directory separator (\)
if($modes=~/8/){
$$W{'uri'}=~s#/#\\#g;
$$W{'uri'}=~s#^\\#/#;
$$W{'uri'}=~s#^(http|file|ftp|nntp|news|telnet):\\#$1://#;
$$W{'uri'}=~s#\\$#/#;
}
# mode 1 - random URI (non-UTF8) encoding
if($modes=~/1/){
if($ENCODED==0){
$$W{'uri'}=encode_str2ruri($$W{'uri'});
$ENCODED=1;}
}
# mode 5 - fake parameter
if($modes=~/5/){
($s,$y)=(&utils_randstr,&utils_randstr);
$$W{'uri'}="/$s.html%3f$y=/../$$W{'uri'}";
}
# mode 3 - premature URL ending
if($modes=~/3/){
$s=&utils_randstr;
$$W{'uri'}="/%20HTTP/1.1%0D%0A%0D%0AAccept%3A%20$s/../..$$W{'uri'}";
}
# mode 6 - TAB as request spacer
if($modes=~/6/){
$$W{'req_spacer'}="\t";
}
} # end anti_ids
=pod
=head1 ++ Sub package: auth
The auth sub package implements HTTP authentication routines.
=cut
########################################################################
=pod
=head1 - Function: LW::auth_brute_force
Params: $auth_method, \%hin, $user, \@passwords [, $domain]
Return: $first_valid_password, undef if error/none found
Perform a HTTP authentication brute force against a server (host and URI
defined in %hin). It will try every password in the password array for
the given user. The first password (in conjunction with the given user)
that doesn't return HTTP 401 is returned (and the brute force is stopped
at that point). $domain is optional, and is only used for NTLM auth.
=cut
sub auth_brute_force {
my ($auth_method, $hrin, $user, $pwordref, $dom)=@_;
my ($P,%hout);
return undef if(!defined $auth_method || length($auth_method)==0);
return undef if(!defined $user || length($user) ==0);
return undef if(!(defined $hrin && ref($hrin) ));
return undef if(!(defined $pwordref && ref($pwordref)));
map {
($P=$_)=~tr/\r\n//d;
auth_set_header($auth_method,$hrin,$user,$P,$dom);
return undef if(http_do_request($hrin,\%hout));
return $P if($hout{'whisker'}->{'http_resp'} ne 401);
} @$pwordref;
return undef;}
########################################################################
=pod
=head1 - Function: LW::auth_set_header
Params: $auth_method, \%hin, $user, $password [, $domain]
Return: nothing (modifies %hin)
Set the appropriate authentication header in %hin.
NOTE: right now only BASIC and NTLM are supported.
=cut
sub auth_set_header {
my ($method, $href, $user, $pass, $domain)=(lc(shift),@_);
return if(!(defined $href && ref($href)));
return if(!defined $user || !defined $pass);
if($method eq 'basic'){
$$href{'Authorization'}='Basic '.encode_base64($user.':'.$pass,'');
}
if($method eq 'proxy-basic'){
$$href{'Proxy-Authorization'}='Basic '.encode_base64($user.':'.$pass,'');
}
if($method eq 'ntlm'){
my $o=ntlm_new($user,$pass,$domain);
$$href{'whisker'}->{'ntlm_obj'}=$o;
$$href{'whisker'}->{'ntlm_step'}=0;
$$href{'Authorization'}='NTLM '.ntlm_client($o);
}
}
########################################################################
=pod
=head1 - Function: LW::do_auth
Params: $auth_method, \%hin, $user, $password [, $domain]
Return: nothing (modifies %hin)
This is an alias for auth_set_header().
=cut
sub do_auth {
goto &auth_set_header;
}
=pod
=head1 ++ Sub package: bruteurl
The bruteurl sub package is used to perform a brute-force of HTTP
requests on an array of string components.
=cut
=pod
=head1 - Function: LW::bruteurl
Params: \%hin, $pre, $post, \@values_in, \@values_out
Return: Nothing (adds to @out)
Bruteurl will perform a brute force against the host/server specified in
%hin. However, it will make one request per entry in @in, taking the
value and setting $hin{'whisker'}->{'uri'}= $pre.value.$post. Any URI
responding with an HTTP 200 or 403 response is pushed into @out. An
example of this would be to brute force usernames, putting a list of
common usernames in @in, setting $pre='/~' and $post='/'.
=cut
sub bruteurl {
my ($hin, $upre, $upost, $arin, $arout)=@_;
my ($U,%hout);
return if(!(defined $hin && ref($hin) ));
return if(!(defined $arin && ref($arin) ));
return if(!(defined $arout && ref($arout)));
return if(!defined $upre || length($upre) ==0);
return if(!defined $upost || length($upost)==0);
http_fixup_request($hin);
map {
($U=$_)=~tr/\r\n//d; next if($U eq '');
if(!http_do_request($hin,\%hout,{'uri'=>$upre.$U.$upost})){
if( $hout{'whisker'}->{'http_resp'}==200 ||
$hout{'whisker'}->{'http_resp'}==403){
push(@{$arout},$U);
}
}
} @$arin;
}
=pod
=head1 ++ Sub package: cookie
Cookie handling functions.
Cookies are stored in a "jar" (hash), indexed by cookie name. The
contents are an anonymous array:
$jar{'name'}=@( 'value', 'domain', 'path', 'expire', 'secure' )
=cut
########################################################################
=pod
=head1 - Function: LW::cookie_read
Params: \%jar, \%hout
Return: $num_of_cookies_read
Read in cookies from an %hout hash (HTTP response), and put them in %jar.
=cut
sub cookie_read {
my ($count,$jarref,$href)=(0,@_);
return 0 if(!(defined $jarref && ref($jarref)));
return 0 if(!(defined $href && ref($href) ));
my $target = utils_find_lowercase_key($href,'set-cookie');
if(!defined $target){
return 0;}
if(ref($target)){ # multiple headers
foreach (@{$target}){
cookie_parse($jarref,$_);
$count++; }
} else { # single header
cookie_parse($jarref,$target);
$count=1; }
return $count;
}
########################################################################
=pod
=head1 - Function: LW::cookie_parse
Params: \%jar, $cookie
Return: nothing
Parses the cookie into the various parts and then sets the appropriate
values in the %jar under the name; if the cookie is blank, it will delete
it from the jar.
=cut
sub cookie_parse {
my ($jarref, $header)=@_;
my ($del,$part,@parts,@construct,$cookie_name)=(0);
return if(!(defined $jarref && ref($jarref)));
return if(!(defined $header && length($header)>0));
@parts=split(/;/,$header);
foreach $part (@parts){
if($part=~/^[ \t]*(.+?)=(.*)$/){
my ($name,$val)=($1,$2);
if($name=~/^domain$/i){
$val=~s#^http://##;
$val=~s#/.*$##;
$construct[1]=$val;
} elsif($name=~/^path$/i){
$val=~s#/$## if($val ne '/');
$construct[2]=$val;
} elsif($name=~/^expires$/i){
$construct[3]=$val;
} else {
$cookie_name=$name;
if($val eq ''){ $del=1;
} else { $construct[0]=$val;}
}
} else {
if($part=~/secure/){
$construct[4]=1;}
} }
if($del){
delete $$jarref{$cookie_name} if defined $$jarref{$cookie_name};
} else {
$$jarref{$cookie_name}=\@construct;
}
}
########################################################################
=pod
=head1 - Function: LW::cookie_write
Params: \%jar, \%hin, $override
Return: nothing
Goes through the given jar and sets the Cookie header in %hin pending the
correct domain and path. If $override is true, then the domain and path
restrictions of the cookies are ignored.
Todo: factor in expire and secure.
=cut
sub cookie_write {
my ($jarref, $hin, $override)=@_;
my ($name,$out)=('','');
return if(!(defined $jarref && ref($jarref)));
return if(!(defined $hin && ref($hin) ));
$override=$override||0;
$$hin{'whisker'}->{'ssl'}=$$hin{'whisker'}->{'ssl'}||0;
foreach $name (keys %$jarref){
next if($name eq '');
next if($$hin{'whisker'}->{'ssl'}==0 && $$jarref{$name}->[4]>0);
if($override ||
($$hin{'whisker'}->{'host'}=~/$$jarref{$name}->[1]$/i &&
$$hin{'whisker'}->{'uri'}=~/$$jarref{$name}->[2]/i)){
$out.="$name=$$jarref{$name}->[0];";
} }
if($out ne ''){ $$hin{'Cookie'}=$out; }
}
########################################################################
=pod
=head1 - Function: LW::cookie_get
Params: \%jar, $name
Return: @elements
Fetch the named cookie from the jar, and return the components.
=cut
sub cookie_get {
my ($jarref,$name)=@_;
return undef if(!(defined $jarref && ref($jarref)));
if(defined $$jarref{$name}){
return @{$$jarref{$name}};}
return undef;
}
########################################################################
=pod
=head1 - Function: LW::cookie_set
Params: \%jar, $name, $value, $domain, $path, $expire, $secure
Return: nothing
Set the named cookie with the provided values into the %jar.
=cut
sub cookie_set {
my ($jarref,$name,$value,$domain,$path,$expire,$secure)=@_;
my @construct;
return if(!(defined $jarref && ref($jarref)));
return if($name eq '');
if($value eq ''){
delete $$jarref{$name};
return;}
$path=$path||'/';
$secure=$secure||0;
@construct=($value,$domain,$path,$expire,$secure);
$$jarref{$name}=\@construct;
}
########################################################################
=pod
=head1 ++ Sub package: crawl
Used for crawling a website by requesting a (start) page, reading the
HTML, extracting the links, and then requesting those links--up to a
specified depth. The module also allows various configuration tweaks to
do such things as monitor requests for offsite URLs (pages on other
hosts), track various cookies, etc.
=cut
#####################################################
=pod
=head1 - Function: LW::crawl
Params: $START, $MAX_DEPTH, \%tracking, \%hin
Return: Nothing
The heart of the crawl package. Will perform an HTTP crawl on the
specified HOST, starting at START URI, proceeding up to MAX_DEPTH. A
tracking hash reference (required) stores the results of each page (and
ongoing progress). The http_in_options hash reference specifies a
standard HTTP hash for use in the outgoing HTTP requests. Certain options
are configurable via LW::crawl_set_config(). The tracking hash will
contain all the pages visited; you can get the crawl engine to skip pages
by placing them in the tracking hash ahead of time.
START (first) parameter should be of the form "http://www.host.com/url".
=cut
sub crawl {
my ($START, $MAX_DEPTH, $hrtrack, $hrin)=@_;
my (%hout, %jar);
my ($T, @ST, @links, @tlinks, @vals, @ERRORS)=('');
return if(!(defined $hrtrack && ref($hrtrack)));
return if(!(defined $hrin && ref($hrin) ));
return if(!defined $START || length($START)==0);
$MAX_DEPTH||=2;
# $ST[0]=HOST $ST[1]=URL $ST[2]=CWD $ST[3]=HTTPS $ST[4]=SERVER
# $ST[5]=PORT $ST[6]=DEPTH
@vals=utils_split_uri($START);
$ST[1]=$vals[0]; # uri
$ST[0]=$vals[2]; # host
$ST[5]=$vals[3]; # port
$ST[4]=undef; # server tag
return if($ST[0] eq '');
# some various informationz...
$LW::crawl_config{'host'}=$ST[0];
$LW::crawl_config{'port'}=$ST[5];
$LW::crawl_config{'start'}=$ST[1];
$$hrin{'whisker'}->{'host'}=$ST[0];
$$hrin{'whisker'}->{'port'}=$ST[5];
$$hrin{'whisker'}->{'lowercase_incoming_headers'}=1; # makes life easier
http_fixup_request($hrin);
# this is so callbacks can access internals via references
$LW::crawl_config{'ref_links'}=\@links;
$LW::crawl_config{'ref_jar'}=\%jar;
$LW::crawl_config{'ref_hin'}=$hrin;
$LW::crawl_config{'ref_hout'}=\%hout;
%LW::crawl_referrers=(); # empty out existing referrers
%LW::crawl_server_tags=();
%LW::crawl_offsites=();
%LW::crawl_cookies=();
%LW::crawl_forms=();
push @links, \@{[$ST[1],1,($vals[1] eq 'https')?1:0]};
while(@links){
my $C=shift @links;
$ST[1]=$C->[0]; # url
$ST[6]=$C->[1]; # depth
$ST[3]=$C->[2]; # https
next if(defined $$hrtrack{$ST[1]} && $$hrtrack{$ST[1]} ne '?');
if($ST[6] > $MAX_DEPTH){
$$hrtrack{$ST[1]}='?' if($LW::crawl_config{'save_skipped'}>0);
next;
}
$ST[2]=utils_get_dir($ST[1]);
$$hrin{'whisker'}->{'uri'}=$ST[1];
$$hrin{'whisker'}->{'ssl'}=$ST[3];
my $result = crawl_do_request($hrin,\%hout);
if($result==1 || $result==2){
push @ERRORS, "Error on making request for '$ST[1]': $hout{'whisker'}->{'error'}";
next;
}
if($result==0 || $result==4){
$$hrtrack{$ST[1]}=$hout{'whisker'}->{'http_resp'}; }
if($result==3 || $result==5){
$$hrtrack{$ST[1]}='?' if($LW::crawl_config{'save_skipped'}>0); }
if(defined $hout{'server'}){
if(!defined $ST[4]){ # server tag
$ST[4]=$hout{'server'}; }
$LW::crawl_server_tags{$hout{'server'}}++;
}
if(defined $hout{'set-cookie'}){
if($LW::crawl_config{'save_cookies'}>0){
if(ref($hout{'set-cookie'})){
foreach (@{$hout{'set-cookie'}}){
$LW::crawl_cookies{$_}++; }
} else {
$LW::crawl_cookies{$hout{'set-cookie'}}++;
} }
if($LW::crawl_config{'reuse_cookies'}>0){
cookie_read(\%jar,\%hout); }
}
next if($result==4 || $result==5);
next if(scalar @links > $LW::crawl_config{'url_limit'});
if($result==0){ # page should be parsed
if($LW::crawl_config{'source_callback'} != 0 &&
ref($LW::crawl_config{'source_callback'})){
&{$LW::crawl_config{'source_callback'}}($hrin,\%hout); }
LW::html_find_tags(\$hout{'whisker'}->{'data'},
\&crawl_extract_links_test);
$LW::crawl_config{'stats_html'}++; # count how many pages we've parsed
}
if($result==3){ # follow the move via location header
push @LW::crawl_urls, $hout{'location'}; }
foreach $T (@LW::crawl_urls){
$T=~tr/\0\r\n//d; # the NULL character is a bug that's somewhere
next if (length($T)==0);
next if ($T=~/^javascript:/i); # stupid javascript
next if ($T=~/^mailto:/i);
next if ($T=~m#^([a-zA-Z]*)://# && lc($1) ne 'http' && lc($1) ne 'https');
next if ($T=~/^#/i); # fragment
if($LW::crawl_config{'callback'} != 0){
next if &{$LW::crawl_config{'callback'}}($T,@ST); }
push(@{$LW::crawl_referrers{$T}}, $ST[1])
if( $LW::crawl_config{'save_referrers'}>0 );
$T=utils_absolute_uri($T,$ST[1],1) if($LW::crawl_config{'normalize_uri'}>0);
@vals=utils_split_uri($T);
# slashdot bug: workaround for the following fsck'd html code:
# <FORM ACTION="//slashdot.org/users.pl" METHOD="GET">
if($LW::crawl_config{'slashdot_bug'} > 0 &&
substr($vals[0],0,2) eq '//'){
if($ST[3]==1){ $T='https:'.$T;
} else { $T='http:' .$T; }
@vals=utils_split_uri($T);
}
# make sure URL is on same host, port, and protocol
if( (defined $vals[2] && $vals[2] ne $ST[0]) ||
(defined $vals[3] && $vals[3] != $ST[5]) ||
(defined $vals[1] && ($vals[1] ne 'http'
&& $vals[1] ne 'https'))){
if($LW::crawl_config{'save_offsites'}>0){
$LW::crawl_offsites{utils_join_uri(@vals)}++; }
next; }
if(substr($vals[0],0,1) ne '/'){
$vals[0]=$ST[2].$vals[0]; }
my $where=rindex($vals[0],'.');
my $EXT='';
if($where >= 0){
$EXT = substr($vals[0], $where+1, length($vals[0])-$where); }
$EXT=~tr/0-9a-zA-Z//cd; # yucky chars will puke regex below
if($EXT ne '' && $LW::crawl_config{'skip_ext'}=~/\.$EXT /i){
if($LW::crawl_config{'save_skipped'}>0){
$$hrtrack{$vals[0]}='?'; }
next; }
if(defined $vals[4] && $LW::crawl_config{'use_params'}>0){
if($LW::crawl_config{'params_double_record'}>0 &&
!defined $$hrtrack{$vals[0]}){
$$hrtrack{$vals[0]}='?'; }
$vals[0]=$vals[0].'?'.$vals[4];
}
next if(defined $$hrtrack{$vals[0]});
push @links, \@{[$vals[0],$ST[6]+1, ($vals[1] eq 'https')?1:0]};
} # foreach
@LW::crawl_urls=(); # reset for next round
} # while
my $key;
foreach $key (keys %LW::crawl_config){
delete $LW::crawl_config{$key} if (substr($key,0,4) eq 'ref_');}
$LW::crawl_config{'stats_reqs'}=$hout{'whisker'}->{'stats_reqs'};
$LW::crawl_config{'stats_syns'}=$hout{'whisker'}->{'stats_syns'};
} # end sub crawl
#####################################################
=pod
=head1 - Function: LW::crawl_get_config
Params: $config_directive
Return: $config_directive_value
Returns the set value of the submitted config_directive. See
LW::crawl_set_config() for a list of configuration values.
=cut
sub crawl_get_config {
my $key=shift;
return $LW::crawl_config{$key};
}
#####################################################
=pod
=head1 - Function: LW::crawl_set_config
Params: $config_directive, $value
Return: Nothing
This function adjusts the configuration of the crawl package. Use values
of 0 and 1 for off and on, respectively. The defaults are set in
libs/globals.wpl.
save_cookies
- crawl will save all cookies encountered, for later review
save_offsite_urls
- crawl will save all offsite URLs (URLs not on this host); crawl
will not actually crawl those hosts (use separate calls to crawl)
follow_moves
- crawl will follow the URL received from an HTTP move response
use_params
- crawl will factor in URI parameters when considering if a URI is unique
or not
params_double_record
- if both use_params and params_double_record are set, crawl will make two
entries for each URI which has paramaters: one with and one without the
parameters
reuse_cookies
- crawl will resubmit any received/prior cookies
skip_ext
- crawl will ignore requests for URLs ending in extensions given; the
value requires a specific string format: (dot)extension(space). For
example, to ignore GIFs and JPGs, you would run:
LW::crawl_set_config('skip_ext',".gif .jpg ");
save_skipped
- any URLs that are skipped via skip_ext, or are above the specified DEPTH
will be recorded in the tracking hash with a value of '?' (instead of an
HTTP response code).
callback
- crawl will call this function (if this is a reference to a function),
passing it the current URI and the @ST array (which has host, port, SSL,
etc info). If the function returns a TRUE value, then crawl will skip
that URI. Set to value 0 (zero) if you do not want to use a callback.
slashdot_bug
- slashdot.org uses a screwy piece of invalid (yet it works) HTML in
the form of <FORM ACTION="//slashdot.org/somefile">. So basically,
when a URL starts with '//' and slashdot_bug is set to 1 (which it
is by default), then the proper 'http:' or 'https:' will be prepended
to the URL.
source_callback
- crawl will call this function (if this is a reference to a function),
passing references to %hin and %hout, right before it parses the page
for HTML links. This allows the callback function to review or
modify the HTML before it's parsed for links. Return value is ignored.
url_limit
- number or URLs that crawl will queue up at one time; defaults to 1000
do_head
- use head requests to determine if a file has a content-type worth
downloading. Potentially saves some time, assuming the server properly
supports HEAD requests. Set to value 1 to use (0/off by default).
=cut
sub crawl_set_config {
return if(!defined $_[0]);
my %opts=@_;
while( my($k,$v)=each %opts){
$LW::crawl_config{lc($k)}=$v; }
}
#####################################################
=pod
=head1 - Function: LW::crawl_extract_links_test (INTERNAL)
Params: $TAG, \%elements, \$html_data, $offset, $len
Return: nothing
This is the callback function used by the crawl function, and passed to
html_find_tags. It will find URL/URI links and place them in
@LW::crawl_urls.
=cut
sub crawl_extract_links_test {
my ($TAG, $hr, $dr, $start, $len)=(lc(shift),@_);
my $t;
# this should be most of the time...
return undef if(!defined ($t=$LW::crawl_linktags{$TAG}));
return undef if(!scalar %$hr); # fastpath quickie
while( my ($key,$val)= each %$hr){ # normalize element values
$$hr{lc($key)} = $val;
}
if(ref($t)){
foreach (@$t){
push(@LW::crawl_urls,$$hr{$_}) if(defined $$hr{$_});
}
} else {
push(@LW::crawl_urls,$$hr{$t}) if(defined $$hr{$t});
}
if($TAG eq 'form' && defined $$hr{action}){
my $u=$LW::crawl_config{'ref_hout'}->{'whisker'}->{'uri'};
$LW::crawl_forms{utils_absolute_uri($$hr{action},$u,1)}++;
}
return undef;
}
################################################################
=pod
=head1 - Function: LW::crawl_make_request (INTERNAL)
Params: \%hin, \%hout
Return: $status_code
This is an internal function used by LW::crawl(), and is responsible for
making HTTP requests, including any HEAD pre-requests and following move
responses. Status codes are:
0 Success
1 Error during request
2 Error on connection setup
3 Move request; follow Location header
4 File not of text/htm(l) type
5 File not available
=cut
sub crawl_do_request {
my ($hrin,$hrout) = @_;
my $ret;
if($LW::crawl_config{'do_head'}){
my $save=$$hrin{'whisker'}->{'method'};
$$hrin{'whisker'}->{'method'}='HEAD';
$ret=http_do_request($hrin,$hrout);
$$hrin{'whisker'}->{'method'}=$save;
return 2 if($ret==2); # if there was connection error, do not continue
if($ret==0){ # successful request
if($$hrout{'whisker'}->{'http_resp'}==501){ # HEAD not allowed
$LW::crawl_config{'do_head'}=0; # no more HEAD requests
}
if($$hrout{'whisker'}->{'http_resp'} <308 &&
$$hrout{'whisker'}->{'http_resp'} >300){
if($LW::crawl_config{'follow_moves'} >0){
return 3 if(defined $$hrout{'location'}); }
return 5; # not avail
}
if($$hrout{'whisker'}->{'http_resp'}==200){
# no content-type is treated as text/htm
if(defined $$hrout{'content-type'} &&
$$hrout{'content-type'}!~/^text\/htm/i){
return 4;
}
# fall through to GET request below
}
}
# request errors are essentially redone via GET, below
}
return http_do_request($hrin,$hrout);
}
#####################################################
=pod
=head1 ++ Sub package: dump
The dump subpackage contains various utility functions which emulate
the basic functionality provided by Data::Dumper.
=cut
########################################################################
=pod
=head1 - Function: LW::dumper
Params: $name, \@array [, $name, \%hash, $name, \$scalar ]
Return: $code, undef on error
The dumper function will take the given $name and data reference, and
will create an ASCII perl code representation suitable for eval'ing
later to recreate the same structure. $name is the name of the variable
that it will be saved as. Example:
$output = LW::dumper('hin',\%hin);
NOTE: dumper() creates anonymous structures under the name given. For
example, if you dump the hash %hin under the name 'hin', then when you
eval the dumped code you will need to use %$hin, since $hin is now a
*reference* to a hash.
=cut
sub dumper {
my %what=@_;
my ($final,$k,$v)=('');
while( ($k,$v)=each %what){
return undef if(ref($k) || !ref($v));
$final.="\$$k = "._dump(1,$v,1);
$final=~s#,\n$##;
$final.=";\n"; }
return $final;
}
########################################################################
=pod
=head1 - Function: LW::dumper_writefile
Params: $file, $name, \@array [, $name, \%hash, $name, \@scalar ]
Return: 0 if success; 1 if error
This calls dumper() and saves the output to the specified $file.
Note: LW does not checking on the validity of the file name, it's
creation, or anything of the sort. Files are opened in overwrite
mode.
=cut
sub dumper_writefile {
my $file=shift;
my $output=dumper(@_);
return 1 if(!open(OUT,">$file") || $output eq 'ERROR');
print OUT $output;
close(OUT);
}
########################################################################
=pod
=head1 - Function: LW::_dump (INTERNAL)
Params: $tabs, $ref
Return: $output
This is an internal function to dumper() which will dereference all
elements and produce the resulting code.
This function is not intended for external use.
=cut
sub _dump { # dereference and dump an element
my ($t, $ref, $depth)=@_;
my ($out,$k,$v)=('');
$depth||=1;
# to protect against circular loops
return 'undef' if($depth > 128);
if(!defined $ref){
return 'undef';
} elsif(ref($ref) eq 'HASH'){
$out.="{\n";
while( ($k,$v)=each %$ref){
next if($k eq '');
$out.= "\t"x$t;
$out.=_dumpd($k).' => ';
if(ref($v)){ $out.=_dump($t+1,$v,$depth+1); }
else { $out.=_dumpd($v); }
$out.=",\n" unless( substr($out,-2,2) eq ",\n");
}
$out=~s#,\n$#\n#;
$out.="\t"x($t-1);
$out.="},\n";
} elsif(ref($ref) eq 'ARRAY'){
$out.="[";
if(~~@$ref){
$out.="\n";
foreach $v (@$ref) {
$out.= "\t"x$t;
if(ref($v)){ $out.=_dump($t+1,$v,$depth+1); }
else { $out.=_dumpd($v); }
$out.=",\n" unless( substr($out,-2,2) eq ",\n");
}
$out=~s#,\n$#\n#;
$out.="\t"x($t-1);
}
$out.="],\n";
} elsif(ref($ref) eq 'SCALAR'){
$out.=_dumpd($$ref);
} elsif(ref($ref) eq 'REF'){
$out.=_dump($t,$$ref,$depth+1);
} elsif(ref($ref)){ # unknown/unsupported ref
$out.="undef";
} else { # normal scalar
$out.=_dumpd($ref);
}
return $out;
}
########################################################################
=pod
=head1 - Function: LW::_dumpd (INTERNAL)
Params: $string
Return: $escaped_string
This is an internal function to dumper() which will escape the given
string to make it suitable for printing.
This function is not intended for external use.
=cut
sub _dumpd { # escape a scalar string
my $v=shift;
return 'undef' if(!defined $v);
return "''" if($v eq '');
return "$v" if($v!~tr/0-9//c);
return "'$v'" if($v!~tr/ !-~//c);
$v=~s#\\#\\\\#g; $v=~s#"#\\"#g;
$v=~s#\r#\\r#g; $v=~s#\n#\\n#g;
$v=~s#\0#\\0#g; $v=~s#\t#\\t#g;
$v=~s#([^!-~ ])#sprintf('\\x%02x',ord($1))#eg;
return "\"$v\"";
}
########################################################################
=pod
=head1 ++ Sub package: easy
The 'easy' subpackage contains many high-level/simple functions to
do basic web tasks. This should make it easier to use libwhisker
to do basic tasks.
=cut
########################################################################
=pod
=head1 - Function: LW::get_page
Params: $url [, \%hin_request]
Return: $code, $data ($code will be set to undef on error, $data will
contain error message)
This function will fetch the page at the given URL, and return the HTTP response code
and page contents. Use this in the form of:
($code,$html)=LW::get_page("http://host.com/page.html")
The optional %hin_request will be used if supplied. This allows you to set
headers and other parameters.
=cut
sub get_page {
my ($URL,$hr)=(shift,shift);
return (undef,"No URL supplied") if(length($URL)==0);
my (%req,%resp);
my $rptr;
if(defined $hr && ref($hr)){
$rptr=$hr;
} else {
$rptr=\%req;
LW::http_init_request(\%req);
}
LW::utils_split_uri($URL,$rptr); # this is newer >=1.1 syntax
LW::http_fixup_request($rptr);
if(http_do_request($rptr,\%resp)){
return (undef,$resp{'whisker'}->{'error'});
}
return ($resp{'whisker'}->{'code'}, $resp{'whisker'}->{'data'});
}
########################################################################
=pod
=head1 - Function: LW::get_page_hash
Params: $url [, \%hin_request]
Return: $hash_ref (undef on no URL)
This function will fetch the page at the given URL, and return the whisker
HTTP response hash. The return code of the function is set to
$hash_ref->{whisker}->{get_page_hash}, and uses the LW::http_do_request()
response values.
Note: undef is returned if no URL is supplied
=cut
sub get_page_hash {
my ($URL,$hr)=(shift,shift);
return undef if(length($URL)==0);
my (%req,%resp);
my $rptr;
if(defined $hr && ref($hr)){
$rptr=$hr;
} else {
$rptr=\%req;
LW::http_init_request(\%req);
}
LW::utils_split_uri($URL,$rptr); # this is newer >=1.1 syntax
LW::http_fixup_request($rptr);
my $r=http_do_request($rptr,\%resp);
$resp{whisker}->{get_page_hash}=$r;
return \%resp;
}
########################################################################
=pod
=head1 - Function: LW::get_page_to_file
Params: $url, $filepath [, \%hin_request]
Return: $code ($code will be set to undef on error)
This function will fetch the page at the given URL, place the resulting HTML
in the file specified, and return the HTTP response code. The optional
%hin_request hash sets the default parameters to be used in the request.
NOTE: libwhisker does not do any file checking; libwhisker will open the
supplied filepath for writing, overwriting any previously-existing files.
Libwhisker does not differentiate between a bad request, and a bad file
open. If you're having troubles making this function work, make sure
that your $filepath is legal and valid, and that you have appropriate
write permissions to create/overwrite that file.
=cut
sub get_page_to_file {
my ($URL, $filepath, $hr)=@_;
return undef if(length($URL)==0);
return undef if(length($filepath)==0);
my (%req,%resp);
my $rptr;
if(defined $hr && ref($hr)){
$rptr=$hr;
} else {
$rptr=\%req;
LW::http_init_request(\%req);
}
LW::utils_split_uri($URL,$rptr); # this is newer >=1.1 syntax
LW::http_fixup_request($rptr);
if(http_do_request($rptr,\%resp)){
return undef;
}
open(OUT,">$filepath") || return undef;
binmode(OUT); # stupid Windows
print OUT $resp{'whisker'}->{'data'};
close(OUT);
return $resp{'whisker'}->{'code'};
}
########################################################################
=pod
=head1 - Function: LW::upload_file
Params: $url, $filepath, $paramname [, \%hin_request]
Return: $code ($code will be set to undef on error)
This function will upload the specified $file to the given $url as
the parameter named $paramname via a multipart POST request. The
optional $hin_request hash lets you set any other particular request
parameters.
NOTE: this is a highly simplied function for basic uploads. If you
need to do more advanced things like set other multipart form
parameters, send multiple files, etc, then you will need to use the
normal API to do it yourself.
=cut
sub upload_file {
my ($URL, $filepath, $paramname, $hr)=@_;
return undef if(length($URL) ==0);
return undef if(length($filepath) ==0);
return undef if(length($paramname)==0);
return undef if(!(-e $filepath && -f $filepath));
my (%req,%resp,%multi);
my $rptr;
if(defined $hr && ref($hr)){
$rptr=$hr;
} else {
$rptr=\%req;
LW::http_init_request(\%req);
}
LW::utils_split_uri($URL,$rptr); # this is newer >=1.1 syntax
$rptr{'whisker'}->{'method'}='POST';
LW::http_fixup_request($rptr);
LW::multipart_setfile(\%multi,$filepath,$paramname);
LW::multipart_write(\%multi,$rptr);
if(http_do_request($rptr,\%resp)){
return undef;
}
return $resp{'whisker'}->{'code'};
}
########################################################################
=pod
=head1 - Function: LW::download_file
Params: $url, $filepath [, \%hin_request]
Return: $code ($code will be set to undef on error)
LW::download_file is just an alias for LW::get_page_to_file().
=cut
sub download_file {
goto &LW::get_page_to_file;
}
########################################################################
=pod
=head1 ++ Sub package: encode
Various type encodings. Installing MIME::Base64 will result in a
compiled C version of base64 functions, which means they will be tons
faster. This is useful if you're going to run a Basic authentication
brute force, which requires a high processing speed. However, it's not
required, since I include a Perl version, which is slower.
=cut
########################################################################
=pod
=head1 - Function: LW::encode_base64
Params: $data, $eol
Return: $base64_encoded_data
LW::encode_base64 is a stub function which will choose the fastest
function available for doing base64 encoding. This is done by checking to
see if the MIME::Base64 perl module is available (which uses fast C
routines). If it's not, then it defaults to a perl version (which is
slower). You can call the perl version direct, but I suggest using the
stub to gain speed advantages where possible.
=cut
#sub encode_base64;
########################################################################
=pod
=head1 - Function: LW::decode_base64
Params: $data
Return: $base64_decoded_data
LW::decode_base64 is a stub function which will choose the fastest
function available for doing base64 decoding. This is done by checking to
see if the MIME::Base64 perl module is available (which uses fast C
routines). If it's not, then it defaults to a perl version (which is
slower). You can call the perl version direct, but I suggest using the
stub to gain speed advantages where possible.
=cut
#sub decode_base64;
########################################################################
=pod
=head1 - Function: LW::encode_base64_perl
Params: $data, $eol
Return: $b64_encoded_data
A perl implementation of base64 encoding. I recommend you use
LW::encode_base64 instead, since it may use the MIME::Base64 module (if
available), which lead to speed advantages. The perl code for this
function was actually taken from an older MIME::Base64 perl module, and
bears the following copyright:
Copyright 1995-1999 Gisle Aas <gisle@aas.no>
NOTE: the $eol parameter will be inserted every 76 characters. This is
used to format the data for output on a 80 character wide terminal.
=cut
sub encode_base64_perl { # ripped from MIME::Base64
my $res = "";
my $eol = $_[1];
$eol = "\n" unless defined $eol;
pos($_[0]) = 0;
while ($_[0] =~ /(.{1,45})/gs) {
$res .= substr(pack('u', $1), 1);
chop($res);}
$res =~ tr|` -_|AA-Za-z0-9+/|;
my $padding = (3 - length($_[0]) % 3) % 3;
$res =~ s/.{$padding}$/'=' x $padding/e if $padding;
if (length $eol) {
$res =~ s/(.{1,76})/$1$eol/g;
} $res; }
########################################################################
=pod
=head1 - Function: LW::decode_base64_perl
Params: $data
Return: $b64_decoded_data
A perl implementation of base64 decoding. The perl code for this function
was actually taken from an older MIME::Base64 perl module, and bears the
following copyright:
Copyright 1995-1999 Gisle Aas <gisle@aas.no>
=cut
sub decode_base64_perl { # ripped from MIME::Base64
my $str = shift;
my $res = "";
$str =~ tr|A-Za-z0-9+=/||cd;
$str =~ s/=+$//; # remove padding
$str =~ tr|A-Za-z0-9+/| -_|; # convert to uuencoded format
while ($str =~ /(.{1,60})/gs) {
my $len = chr(32 + length($1)*3/4); # compute length byte
$res .= unpack("u", $len . $1 ); # uudecode
}$res;}
########################################################################
=pod
=head1 - Function: LW::encode_str2uri
Params: $data
Return: $result
This function encodes every character (except the / character) with normal
URL hex encoding.
=cut
sub encode_str2uri { # normal hex encoding
my $str=shift;
$str=~s/([^\/])/sprintf("%%%02x",ord($1))/ge;
return $str;}
#########################################################################
=pod
=head1 - Function: LW::encode_str2ruri
Params: $data
Return: $result
This function randomly encodes characters (except the / character) with
normal URL hex encoding.
=cut
sub encode_str2ruri { # random normal hex encoding
my @T=split(//,shift);
my $s;
foreach (@T) {
if(m#;=:&@\?#){
$s.=$_;
next;
}
if((rand()*2)%2 == 1){ $s.=sprintf("%%%02x",ord($_)) ;
}else{ $s.=$_; }
}
return $s;
}
#########################################################################
=pod
=head1 - Function: LW::encode_unicode
Params: $data
Return: $result
This function converts a normal string into Windows unicode format.
=cut
sub encode_unicode
{
my $r='';
foreach $c (split(//,shift)){
$r.=pack("v",ord($c));
}
return $r;
}
#########################################################################
=pod
=head1 ++ Sub package: forms
This subpackage contains various routines to parse and handle HTML forms.
The goal is to parse the variable, human-readable HTML into concrete
structures useable by your program. The forms package does do a good job
at making these structures, but I will admit: they are not exactly simple,
and thus not a cinch to work with. But then again, representing something
as complex as a HTML form is not a simple thing either. I think the
results are acceptable for what's trying to be done. Anyways...
Forms are stored in perl hashes, with elements in the following format:
$form{'element_name'}=@([ 'type', 'value', @params ])
Thus every element in the hash is an array of anonymous arrays. The first
array value contains the element type (which is 'select', 'textarea',
'button', or an 'input' value of the form 'input-text', 'input-hidden',
'input-radio', etc).
The second value is the value, if applicable (it could be undef if no
value was specified). Note that select elements will always have an undef
value--the actual values are in the subsequent options elements.
The third value, if defined, is an anonymous array of additional tag
parameters found in the element (like 'onchange="blah"', 'size="20"',
'maxlength="40"', 'selected', etc).
The array does contain one special element, which is stored in the hash
under a NULL character ("\0") key. This element is of the format:
$form{"\0"}=['name', 'method', 'action', @parameters];
The element is an anonymous array that contains strings of the form's
name, method, and action (values can be undef), and a @parameters array
similar to that found in normal elements (above).
Accessing individual values stored in the form hash becomes a test of your
perl referencing skills. Hint: to access the 'value' of the third element
named 'choices', you would need to do:
$form{'choices'}->[2]->[1];
The '[2]' is the third element (normal array starts with 0), and the
actual value is '[1]' (the type is '[0]', and the parameter array is
'[2]').
=cut
################################################################
=pod
=head1 - Function: LW::forms_read
Params: \$html_data
Return: @found_forms
This function parses the given $html_data into libwhisker form hashes.
It returns an array of hash references to the found forms.
=cut
sub forms_read {
my $dr=shift;
return undef if(!ref($dr) || length($$dr)==0);
@LW::forms_found=();
LW::html_find_tags($dr,\&forms_parse_callback);
if(scalar %LW::forms_current){
my %DUP=%LW::forms_current;
push(@LW::forms_found,\%DUP);
}
return @LW::forms_found;
}
################################################################
=pod
=head1 - Function: LW::forms_write
Params: \%form_hash
Return: $html_of_form [undef on error]
This function will take the given %form hash and compose a generic HTML
representation of it, formatted with tabs and newlines in order to make it
neat and tidy for printing.
Note: this function does *not* escape any special characters that were
embedded in the element values.
=cut
sub forms_write {
my $hr=shift;
return undef if(!ref($hr) || !(scalar %$hr));
return undef if(!defined $$hr{"\0"});
my $t='<form name="'.$$hr{"\0"}->[0].'" method="';
$t.=$$hr{"\0"}->[1].'" action="'.$$hr{"\0"}->[2].'"';
if(defined $$hr{"\0"}->[3]){
$t.=' '.join(' ',@{$$hr{"\0"}->[3]}); }
$t.=">\n";
while( my($name,$ar)=each(%$hr) ){
next if($name eq "\0");
foreach $a (@$ar){
my $P='';
$P=' '.join(' ', @{$$a[2]}) if(defined $$a[2]);
$t.="\t";
if($$a[0] eq 'textarea'){
$t.="<textarea name=\"$name\"$P>$$a[1]";
$t.="</textarea>\n";
} elsif($$a[0]=~m/^input-(.+)$/){
$t.="<input type=\"$1\" name=\"$name\" ";
$t.="value=\"$$a[1]\"$P>\n";
} elsif($$a[0] eq 'option'){
$t.="\t<option value=\"$$a[1]\"$P>$$a[1]\n";
} elsif($$a[0] eq 'select'){
$t.="<select name=\"$name\"$P>\n";
} elsif($$a[0] eq '/select'){
$t.="</select$P>\n";
} else { # button
$t.="<button name=\"$name\" value=\"$$a[1]\">\n";
}
}
}
$t.="</form>\n";
return $t;
}
################################################################
=pod
=head1 - Function: LW::forms_parse_html (INTERNAL)
Params: $TAG, \%elements, \$html_data, $offset, $len
Return: nothing
This is an &html_find_tags callback used to parse HTML into form hashes.
You should not call this directly, but instead use &LW::forms_read.
=cut
{ # these are private static variables for &forms_parse_html
%FORMS_ELEMENTS=( 'form'=>1, 'input'=>1,
'textarea'=>1, 'button'=>1,
'select'=>1, 'option'=>1,
'/select'=>1 );
$CURRENT_SELECT=undef;
$UNKNOWNS=0;
sub forms_parse_callback {
my ($TAG, $hr, $dr, $start, $len)=(lc(shift),@_);
my ($saveparam, $parr, $key)=(0,undef,'');
# fastpath shortcut
return undef if(!defined $FORMS_ELEMENTS{$TAG});
LW::utils_lowercase_hashkeys($hr) if(scalar %$hr);
if($TAG eq 'form'){
if(scalar %LW::forms_current){ # save last form
my %DUP=%LW::forms_current;
push (@LW::forms_found, \%DUP);
%LW::forms_current=();
}
$LW::forms_current{"\0"}=[$$hr{name},$$hr{method},
$$hr{action},undef];
delete $$hr{'name'}; delete $$hr{'method'}; delete $$hr{'action'};
$key="\0"; $parr=\@{$LW::forms_current{"\0"}};
$UNKNOWNS=0;
} elsif($TAG eq 'input'){
$$hr{type}='text' if(!defined $$hr{type});
$$hr{name}='unknown'.$UNKNOWNS++ if(!defined $$hr{name});
$key=$$hr{name};
push( @{$LW::forms_current{$key}},
(['input-'.$$hr{type},$$hr{value},undef]) );
delete $$hr{'name'}; delete $$hr{'type'}; delete $$hr{'value'};
$parr=\@{$LW::forms_current{$key}->[-1]};
} elsif($TAG eq 'select'){
$$hr{name}='unknown'.$UNKNOWNS++ if(!defined $$hr{name});
$key=$$hr{name};
push( @{$LW::forms_current{$key}}, (['select',undef,undef]) );
$parr=\@{$LW::forms_current{$key}->[-1]};
$CURRENT_SELECT=$key;
delete $$hr{name};
} elsif($TAG eq '/select'){
push( @{$LW::forms_current{$CURRENT_SELECT}},
(['/select',undef,undef]) );
$CURRENT_SELECT=undef;
return undef;
} elsif($TAG eq 'option'){
return undef if(!defined $CURRENT_SELECT);
if(!defined $$hr{value}){
my $stop=index($$dr,'<',$start+$len);
return undef if($stop==-1); # MAJOR PUKE
$$hr{value}=substr($$dr,$start+$len,
($stop-$start-$len));
$$hr{value}=~tr/\r\n//d;
}
push( @{$LW::forms_current{$CURRENT_SELECT}},
(['option',$$hr{value},undef]) );
delete $$hr{value};
$parr=\@{$LW::forms_current{$CURRENT_SELECT}->[-1]};
} elsif($TAG eq 'textarea'){
my $stop=$start+$len;
# find closing </textarea> tag
do { $stop=index($$dr,'</',$stop+2);
return undef if($stop==-1); # MAJOR PUKE
} while( lc(substr($$dr,$stop+2,8)) ne 'textarea');
$$hr{value}=substr($$dr,$start+$len,($stop-$start-$len));
$$hr{name}='unknown'.$UNKNOWNS++ if(!defined $$hr{name});
$key=$$hr{name};
push( @{$LW::forms_current{$key}},
(['textarea',$$hr{value},undef]) );
$parr=\@{$LW::forms_current{$key}->[-1]};
delete $$hr{'name'}; delete $$hr{'value'};
} else { # button
$$hr{name}='unknown'.$UNKNOWNS++ if(!defined $$hr{name});
$key=$$hr{name};
push( @{$LW::forms_current{$key}},
(['button',$$hr{value},undef]) );
}
if(scalar %$hr){
my @params=();
foreach $k (keys %$hr){
if(defined $$hr{$k}){
push @params, "$k=\"$$hr{$k}\"";
} else { push @params, $k; }
}
$$parr[2]=\@params;
}
return undef;
}}
=pod
=head1 ++ Sub package: html
The html sub package implements a simple HTML parser.
=cut
################################################################
=pod
=head1 - Function: LW::html_find_tags
Params: \$data, \&callback_function [, $xml_flag]
Return: nothing
LW::html_find_tags parses a piece of HTML and 'extracts' all found tags,
passing the info to the given callback function. The callback function
must accept two parameters: the current tag (as a scalar), and a hash ref
of all the tag's elements. For example, the tag <a href="/file"> will
pass 'a' as the current tag, and a hash reference which contains
{'href'}="/file".
The xml_flag, when set, causes the parser to do some extra processing
and checks to accomodate XML style tags such as <tag foo="bar"/>.
Notice: this function is slow! And using it to rewrite (via passback) is
slower! Make sure you have LW::bin installed to get the faster binary
version.
=cut
sub html_find_tags {
# use faster binary helper
goto &LW::bin::html_find_tags
if(defined $LW::available{'lw::bin'});
my ($dataref, $callbackfunc, $xml)=@_;
return if(!(defined $dataref && ref($dataref) ));
return if(!(defined $callbackfunc && ref($callbackfunc)));
$xml||=0;
my ($CURTAG, $ELEMENT, $VALUE, $c, $cc);
my ($INCOMMENT,$INTAG,$INSCRIPT,$INCDATA)=(0,0,0,0);
my (%TAG, $ret, $start, $tagstart, $commstart, $scriptstart, $x);
# YES, this looks like C. In fact, it's my C version ported to
# perl. But it's faster and more dependable than any regex mess
# someone could come up with.
my $LEN = length($$dataref);
for ($c=0; $c<$LEN; $c++){
$cc=substr($$dataref,$c,1);
next if(!$INCOMMENT && !$INTAG && !$INSCRIPT && $cc ne '>' && $cc ne '<');
if($cc eq '<'){
if($INSCRIPT){
if(lc(substr($$dataref,$c+1,7)) eq '/script'){
$INSCRIPT=0;
$TAG{'='}=substr($$dataref, $scriptstart,
$c - $scriptstart - 1);
} else { next; }
}
next if($INCDATA); # skip tags in xml CDATA section
if(substr($$dataref,$c+1,3) eq '!--'){
$INCOMMENT=1; $commstart=$c; $c+=3;
$INCDATA++ if($xml&&substr($$dataref,$c+1,8) eq '![CDATA[');
} else {
$INTAG=1; $c++;
$c++ while(substr($$dataref,$c,1)=~tr/< \t\r\n//);
$tagstart=$c-1;
$CURTAG='';
while(($x=substr($$dataref,$c,1))!~tr/ \t\r\n>=// &&
$c < $LEN){
$CURTAG.=$x; $c++;}
chop $CURTAG if($xml && substr($CURTAG,-1,1) eq '/');
$c++ if($x ne '>');
$INSCRIPT=1 if($CURTAG eq 'script' && !$xml);
}
$cc=substr($$dataref,$c,1); # refresh current char (cc)
}
if($cc eq '>'){
if($INSCRIPT){
if($CURTAG eq 'script'){
$scriptstart = $c + 1;
} else { next; }
}
if($INCDATA && substr($$dataref,$c-2,2) eq ']]'){
$INCDATA=0;
next;
}
if(!$INCOMMENT && $INTAG){
$INTAG=0;
$TAG{'/'}++ if($xml&&substr($$dataref,$c-1,1) eq '/');
$ret=&$callbackfunc($CURTAG,\%TAG, $dataref,
$tagstart, $c-$tagstart+1);
$c+=$ret if(defined $ret && $ret != 0);
$CURTAG='';
%TAG=();
}
if($INCOMMENT && substr($$dataref,$c-2,2) eq '--'){
$INCOMMENT=0;
$TAG{'='}=substr($$dataref,$commstart+4,
$c-$commstart-3);
$ret=&$callbackfunc('!--',\%TAG, $dataref,
$commstart, $c-$commstart+1);
$c+=$ret if(defined $ret && $ret != 0);
delete $TAG{'='};
next;
}
}
next if($INCOMMENT);
if($INTAG){
$ELEMENT=''; $VALUE='';
# eat whitespace
$c++ while(substr($$dataref,$c,1)=~tr/ \t\r\n//);
$start=$c;
$c++ while(substr($$dataref,$c,1)!~tr/ \t\r\n=\>// && $c<$LEN);
if($c > $start){
$ELEMENT=substr($$dataref,$start,$c-$start);
chop $ELEMENT if($xml&&substr($ELEMENT,-1,1) eq '/');
}
if(substr($$dataref,$c,1) ne '>'){
# eat whitespace
$c++ while(substr($$dataref,$c,1)=~tr/ \t\r\n//);
if(substr($$dataref,$c,1) eq '='){
$c++;
$start=$c;
my $p = substr($$dataref,$c,1);
if($p eq '"' || $p eq '\''){
$c++; $start++;
$c++ while(substr($$dataref,$c,1) ne $p &&
$c < $LEN);
$VALUE=substr($$dataref,$start,$c-$start);
$c++;
} else {
$c++ while(substr($$dataref,$c,1)!~tr/ \t\r\n\>// &&
$c < $LEN);
$VALUE=substr($$dataref,$start,$c-$start);
chop $VALUE if($xml&&substr($$dataref,$c-1,2) eq '/>');
}
# eat whitespace
$c++ while(substr($$dataref,$c,1)=~tr/ \t\r\n//);
}
} # if $c ne '>'
$c--;
$TAG{$ELEMENT}=$VALUE if($ELEMENT ne '' && ($xml && $ELEMENT ne '/'));
}
}}
################################################################
=pod
=head1 ++ Sub package: http
The http package is the core package of libwhisker. It is responsible
for making the HTTP requests, and parsing the responses. It can handle
HTTP 0.9, 1.0, and 1.1 requests, and allows pretty much every aspect of
the request to be configured and controlled. The HTTP functions use a
HTTP in/out hash, which is a normal perl hash. For outgoing HTTP requests
('hin' hashes), the keys/values represent outgoing HTTP headers. For HTTP
responses ('hout' hashes), the keys/values represent incoming HTTP
headers. For both, however, there is a special key, 'whisker', whose
value is a hash reference. The whisker control hash contains more
configuration variables, which include host, port, and uri of the desired
request. To access the whisker control hash, use the following
notation: $hash{'whisker'}->{'key'}='value';
You should view LW::http_init_request() for a list of core whisker control
hash values.
The internals of the http subpackage will be rewritten shortly--the
current implementation is gross and not very good style. Note that the
API will be unaffected; it will only be an internal reordering. All
references/uses of $$Z will be cleaned up to be more practical/eliminated.
=cut
##################################################################
=pod
=head1 - Function: LW::http_init_request
Params: \%request_hash_to_initialize
Return: Nothing (modifies input hash)
Sets default values to the input hash for use. Sets the host to
'localhost', port 80, request URI '/', using HTTP 1.1 with GET
method. The timeout is set to 10 seconds, no proxies are defined, and all
URI formatting is set to standard HTTP syntax. It also sets the
Connection (Keep-Alive) and User-Agent headers.
NOTICE!! It's important to use http_init_request before calling
http_do_request, or http_do_request might puke. Thus, a special magic
value is placed in the hash to let http_do_request know that the hash has
been properly initialized. If you really must 'roll your own' and not use
http_init_request before you call http_do_request, you will at least need
to set the INITIAL_MAGIC value (amongst other things).
=cut
sub http_init_request { # doesn't return anything
my ($hin)=shift;
return if(!(defined $hin && ref($hin)));
%$hin=(); # clear control hash
# control values
$$hin{'whisker'}={
req_spacer => ' ',
req_spacer2 => ' ',
http_ver => '1.1',
method => 'GET',
method_postfix => '',
port => 80,
uri => '/',
uri_prefix => '',
uri_postfix => '',
uri_param_sep => '?',
host => 'localhost',
http_req_trailer => '',
timeout => 10,
include_host_in_uri => 0,
ignore_duplicate_headers=> 1,
normalize_incoming_headers => 1,
lowercase_incoming_headers => 0,
ssl => 0,
http_eol => "\x0d\x0a",
force_close => 0,
force_open => 0,
retry => 1,
trailing_slurp => 0,
force_bodysnatch => 0,
INITIAL_MAGIC => 31337
};
# default header values
$$hin{'Connection'}='Keep-Alive'; # notice it is now default!
$$hin{'User-Agent'}="libwhisker/$LW::VERSION"; # heh
}
##################################################################
=pod
=head1 - Function: LW::http_do_request
Params: \%request, \%response [, \%configs]
Return: >=1 if error; 0 if no error (also modifies response hash)
*THE* core function of libwhisker. LW::http_do_request actually performs
the HTTP request, using the values submitted in %request, and placing result
values in %response. This allows you to resubmit %request in subsequent
requests (%response is automatically cleared upon execution). You can
submit 'runtime' config directives as %configs, which will be spliced into
$hin{'whisker'}->{} before anything else. That means you can do:
LW::http_do_request(\%req,\%resp,{'uri'=>'/cgi-bin/'});
This will set $req{'whisker'}->{'uri'}='/cgi-bin/' before execution, and
provides a simple shortcut (note: it does modify %req).
This function will also retry any requests that bomb out during the
transaction (but not during the connecting phase). This is controlled
by the {whisker}->{retry} value. Also note that the returned error
message in resp is the *last* error received. All retry errors are
put into {whisker}->{retry_errors}, which is an anonymous array.
Also note that all NTLM auth logic is implemented in http_do_request().
NTLM requires multiple requests in order to work correctly, and so this
function attempts to wrap that and make it all transparent, so that the
final end result is what's passed to the application.
This function will return 0 on success, 1 on HTTP protocol error, and 2
on non-recoverable network connection error (you can retry error 1, but
error 2 means that the server is totally unreachable and there's no
point in retrying).
=cut
sub http_do_request {
my @params = @_;
my $retry_count = ${$params[0]}{'whisker'}->{'retry'} || 0;
my ($ret, @retry_errors, $auth);
return 1 if(!(defined $params[0] && ref($params[0])));
return 1 if(!(defined $params[1] && ref($params[1])));
if(defined $params[2]){
foreach (keys %{$params[2]}){
${$params[0]}{'whisker'}->{$_}=${$params[2]}{$_};}}
$auth=$params[0]->{'Authorization'} if(defined $params[0]->{'Authorization'});
do {
if(defined $auth && $auth=~/^NTLM/){
$ret=0;
if($params[0]->{'whisker'}->{'ntlm_step'}==0){
$ret=LW::http_do_request_ex($params[0],$params[1]);
return 2 if($ret==2);
if($ret==0){
return 0 if($params[1]->{'whisker'}->{'code'} == 200);
return 1 if($params[1]->{'whisker'}->{'code'} != 401);
$params[0]->{'whisker'}->{'ntlm_step'}=1;
my $thead=utils_find_lowercase_key($params[1],'www-authenticate');
return 1 if(!defined $thead);
return 1 if($thead!~m/^NTLM (.+)$/);
$params[0]->{'Authorization'}='NTLM '.ntlm_client(
$params[0]->{'whisker'}->{'ntlm_obj'},$1);
}
}
if($ret==0){
delete $params[0]->{'Authorization'}
if($params[0]->{'whisker'}->{'ntlm_step'}>1);
$ret=LW::http_do_request_ex($params[0],$params[1]);
$params[0]->{'Authorization'}=$auth;
if($ret>0){ $params[0]->{'whisker'}->{'ntlm_step'}=0;
} else { $params[0]->{'whisker'}->{'ntlm_step'}=2; }
return $ret if($ret==2||$ret==0);
}
} else {
$ret=LW::http_do_request_ex($params[0],$params[1]);
push @{${$params[1]}{'whisker'}->{'retry_errors'}},
@retry_errors if scalar(@retry_errors);
return $ret if($ret==0 || $ret==2);
}
push @retry_errors, ${$params[1]}{'whisker'}->{'error'};
$retry_count--;
} while( $retry_count >= 0);
# if we get here, we still had errors, but no more retries
return 1;
}
##################################################################
=pod
=head1 - Function: LW::http_do_request_ex
Params: \%req, \%resp, \%configs
Return: >=1 if error; 0 if no error
NOTE: you should go through http_do_request(), which calls this function.
This function actually does all the request work. It is called by
http_do_request(), which has a 'retry wrapper' built into it to catch
errors.
=cut
sub http_do_request_ex {
my ($hin, $hout, $hashref)=@_;
my ($temp,$vin,$resp,$S,$a,$b,$vout,@c,$c,$res)=(1,'');
my $W; # shorthand alias for the {'whisker'} hash
return 1 if(!(defined $hin && ref($hin) ));
return 1 if(!(defined $hout && ref($hout)));
%$hout=(); # clear output hash
$$hout{whisker}->{uri}=$$hin{whisker}->{uri}; # for tracking purposes
$$hout{whisker}->{'INITIAL_MAGIC'}=31338; # we can tell requests from responses
if($LW::LW_HAS_SOCKET==0){
$$hout{'whisker'}->{'error'}='Socket support not available';
return 2;}
if(!defined $$hin{'whisker'} ||
!defined $$hin{'whisker'}->{'INITIAL_MAGIC'} ||
$$hin{'whisker'}->{'INITIAL_MAGIC'}!=31337 ){
$$hout{'whisker'}->{'error'}='Input hash not initialized';
return 2;
}
if(defined $hashref){
foreach (keys %$hashref){
$$hin{'whisker'}->{$_}=$$hashref{$_};}}
# if we want anti-IDS, make a copy and setup new values
if(defined $$hin{'whisker'}->{'anti_ids'}){
my %copy=%{$hin};
anti_ids(\%copy,$$hin{'whisker'}->{'anti_ids'});
$W = $copy{'whisker'};
} else {
$W = $$hin{'whisker'};
}
if($$W{'ssl'}>0 && $LW::LW_HAS_SSL!=1){
$$hout{'whisker'}->{'error'}='SSL not available';
return 2;}
$TIMEOUT=$$W{'timeout'}||10;
my $cache_key = defined $$W{'proxy_host'} ?
join(':',$$W{'proxy_host'},$$W{'proxy_port'}) :
join(':',$$W{'host'},$$W{'port'});
if(!defined $http_host_cache{$cache_key}){
# make new entry
push(@{$http_host_cache{$cache_key}},
undef, # SOCKET $$Z[0]
0, # $SOCKSTATE $$Z[1]
undef, # INET_ATON $$Z[2]
undef, # $SSL_CTX $$Z[3]
undef, # $SSL_THINGY $$Z[4]
'', # $OUTGOING_QUEUE $$Z[5]
'', # $INCOMING_QUEUE $$Z[6]
0, # $STATS_SYNS $$Z[7]
0, # $STATS_REQS $$Z[8]
undef ) # SSL session ID $$Z[9]
}
# NOTE: the 'Z' reference will be going away in future versions...
$Z = $http_host_cache{$cache_key};
# use $chost/$cport for actual server we are connecting to
my ($chost,$cport,$cwhat,$PROXY)=('',80,'',0);
if(defined $$W{'proxy_host'}){
$chost=$$W{'proxy_host'};
$cport=$$W{'proxy_port'}||80;
$cwhat='proxy';
$PROXY=1;
if($$W{'ssl'}>0 && $LW::LW_SSL_LIB==2){
$ENV{HTTPS_PROXY} ="$$W{'proxy_host'}:";
$ENV{HTTPS_PROXY}.=$$W{'proxy_port'}||80; }
} else {
$chost=$$W{'host'};
$cport=$$W{'port'};
$cwhat='host';
}
if($$Z[1]>0){ # check to see if socket is still alive
if(! sock_valid($Z,$hin,$hout) ){
$$Z[1]=0;
sock_close($$Z[0],$$Z[4]);
} }
# technically we have a race condition: socket can go
# bad before we send request, below. But that's ok,
# we handle the errors down there.
if($$Z[1]==0){
my $SOCK = _newsym();
if(defined $$W{'UDP'} && $$W{'UDP'}>0){
if(!socket($SOCK,PF_INET,SOCK_DGRAM,getprotobyname('udp')||0)){
$$hout{'whisker'}->{'error'}='Socket() problems (UDP)';
return 2;}
} else {
if(!socket($SOCK,PF_INET,SOCK_STREAM,getprotobyname('tcp')||0)){
$$hout{'whisker'}->{'error'}='Socket() problems';
return 2;}
}
$$Z[0]=$SOCK;
if(defined $$W{'bind_socket'}){
my $port=$$W{'bind_port'}||14011;
my $addr;
if(defined $$W{'bind_addr'}){
$addr=inet_aton($$W{'bind_addr'});
} else {
$addr=INADDR_ANY;
}
if(!bind($SOCK, sockaddr_in($port,$addr))){
$$hout{'whisker'}->{'error'}='Bind() on socket failed';
return 2;
}
}
$$Z[5]=$$Z[6]=''; # flush in/out queues
if($$W{'ssl'}>0){ # ssl setup stuff
if($LW::LW_SSL_LIB==1){
if(!defined($$Z[3])){
if(! ($$Z[3] = Net::SSLeay::CTX_new()) ){
$$hout{'whisker'}->{'error'}="SSL_CTX error: $!";
return 2;}
if(defined $$W{'ssl_rsacertfile'}){
if(! (Net::SSLeay::CTX_use_RSAPrivateKey_file($$Z[3],
$$W{'ssl_rsacertfile'},
&Net::SSLeay::FILETYPE_PEM))){
$$hout{'whisker'}->{'error'}="SSL_CTX_use_rsacert error: $!";
return 2;}
}
if(defined $$W{'ssl_certfile'}){
if(! (Net::SSLeay::CTX_use_certificate_file($$Z[3],
$$W{'ssl_certfile'},
&Net::SSLeay::FILETYPE_PEM))){
$$hout{'whisker'}->{'error'}="SSL_CTX_use_cert error: $!";
return 2;}
}
}
if(! ($$Z[4] = Net::SSLeay::new($$Z[3])) ){
$$hout{'whisker'}->{'error'}="SSL_new error: $!";
return 2;}
if(defined $$W{'ssl_ciphers'}){
if(!(Net::SSLeay::set_cipher_list($$Z[4],
$$W{'ssl_ciphers'}))){
$$hout{'whisker'}->{'error'}="SSL_set_ciphers error: $!";
return 2;}
}
}
}
$$Z[2]=inet_aton($chost) if(!defined $$Z[2]);
if(!defined $$Z[2]){ # can't find hostname
$$hout{'whisker'}->{'error'}="Can't resolve hostname";
return 2;
}
if($$W{'ssl'}>0 && $LW::LW_SSL_LIB==2){
# proxy set in ENV; we always connect to host
$$Z[4]= Net::SSL->new(
PeerAddr => $$hin{'whisker'}->{'host'},
PeerPort => $$hin{'whisker'}->{'port'},
Timeout => $TIMEOUT );
if($@){ $$hout{'whisker'}->{'error'}="Can't connect via SSL: $@[0]";
return 2;}
$$Z[4]->autoflush(1);
} else {
if($LW::LW_NONBLOCK_CONNECT){
my $flags=fcntl($$Z[0],F_GETFL,0);
$flags |= O_NONBLOCK; # set nonblock flag
if(!(fcntl($$Z[0],F_SETFL,$flags))){ # error setting flag
$LW::LW_NONBLOCK_CONNECT=0; # revert to normal
} else {
my $R=connect($$Z[0],sockaddr_in($cport,$$Z[2]));
if(!$R){ # we didn't connect...
if($! != EINPROGRESS){
close($$Z[0]);
$$Z[0]=undef; # this is a bad socket
$$hout{'whisker'}->{'error'}="Can't connect to $cwhat";
return 2;}
vec($vin,fileno($$Z[0]),1)=1;
if(!select(undef,$vin,undef,$TIMEOUT) || !getpeername($$Z[0])){
close($$Z[0]);
$$Z[0]=undef; # this is a bad socket
$$hout{'whisker'}->{'error'}="Can't connect to $cwhat";
return 2;
} }
$flags &= ~O_NONBLOCK; # clear nonblock flag
if(!(fcntl($$Z[0],F_SETFL,$flags))){ # not good!
close($$Z[0]);
$LW::LW_NONBLOCK_CONNECT=0;
$$Z[0]=undef;
$$hout{'whisker'}->{'error'}="Error setting socket to block";
return 2;
} }
}
if(!defined $$Z[0]){ # this is a safety catch
$$hout{'whisker'}->{'error'}="Error creating valid socket connection";
return 2; }
if($LW::LW_NONBLOCK_CONNECT==0){ # attempt to do a timeout alarm...
eval {
local $SIG{ALRM} = sub { die "timeout\n" };
eval {alarm($TIMEOUT)};
if(!connect($$Z[0],sockaddr_in($cport,$$Z[2]))){
alarm(0);
die("no_connect\n"); }
eval {alarm(0)};
};
if($@ || !(defined $$Z[0])){
$$hout{'whisker'}->{'error'}="Can't connect to $cwhat";
return 2;
} }
binmode($$Z[0]); # stupid Windows
# same as IO::Handle->autoflush(1), without importing 1000+ lines
my $S=select($$Z[0]);
$|++; select($S);
}
$$Z[1]=1; $$Z[7]++;
if($$W{'ssl'}>0){
if($LW::LW_SSL_LIB==1){
if($PROXY){ # handle the proxy CONNECT stuff...
my $SSL_CONNECT = "CONNECT $$W{'host'}".
":$$W{'port'}/ HTTP/1.0\n\n";
syswrite($$Z[0],$SSL_CONNECT, length($SSL_CONNECT)); }
Net::SSLeay::set_fd($$Z[4], fileno($$Z[0]));
Net::SSLeay::set_session($$Z[4],$$Z[9]) if(defined $$Z[9]);
if(! (Net::SSLeay::connect($$Z[4])) ){
$$hout{'whisker'}->{'error'}="SSL_connect error: $!";
sock_close($$Z[0],$$Z[4]); return 2;}
if(defined $$W{'save_ssl_info'} &&
$$W{'save_ssl_info'}>0){
ssl_save_info($hout,$$Z[4]); }
my $x=Net::SSLeay::ctrl($$Z[4],6,0,'');
$$Z[9]=Net::SSLeay::get_session($$Z[4]) unless(defined $$W{'ssl_resume'} &&
$$W{'ssl_resume'}==0);
}
} else {
$$Z[4]=undef;
}
}
if(defined $$W{'ids_session_splice'} &&
$$W{'ids_session_splice'}>0 &&
$$W{'ssl'}==0){ # no session_spice over ssl
setsockopt($$Z[0],SOL_SOCKET,SO_SNDLOWAT,1);
@c=split(//, &http_req2line($hin));
# notice we bypass queueing here, in order to trickle the packets
my $ss;
foreach $c (@c){
$ss=syswrite($$Z[0],$c,1); # char size assumed to be 1
if(!defined $ss || $ss==0){
$$hout{'whisker'}->{'error'}="Error sending session splice request to server";
sock_close($$Z[0],$$Z[4]); return 1;
}
select(undef,undef,undef,.1);
}
} else {
http_queue(http_req2line($hin)); }
$$Z[8]++;
if($$W{'http_ver'} ne '0.9'){
my %SENT;
if(defined $$W{'header_order'} && ref($$W{'header_order'})){
foreach (@{$$W{'header_order'}}){
next if($_ eq '' || $_ eq 'whisker');
if(ref($$hin{$_})){
$SENT{$_}||=0;
my $v=$$hin{$_}->[$SENT{$_}];
http_queue("$_: $v$$W{'http_eol'}");
} else {
http_queue("$_: $$hin{$_}$$W{'http_eol'}");
}
$SENT{$_}++;
}
}
foreach (keys %$hin){
next if($_ eq '' || $_ eq 'whisker');
next if(defined $SENT{$_});
if(ref($$hin{$_})){ # header with multiple values
my $key=$_;
foreach (@{$$hin{$key}}){
http_queue("$key: $_$$W{'http_eol'}");}
} else { # normal header
http_queue("$_: $$hin{$_}$$W{'http_eol'}");
}
}
if(defined $$W{'raw_header_data'}){
http_queue($$W{'raw_header_data'});}
http_queue($$W{'http_eol'});
if(defined $$W{'data'}){
http_queue($$W{'data'});}
} # http 0.9 support
# take a MD5 of queue, if wanted
if(defined $$W{'queue_md5'}){
$$hout{'whisker'}->{'queue_md5'}= LW::md5($$Z[5]);
}
# all data is wrangled...actually send it now
if($res=http_queue_send($$Z[0],$$Z[4])){
$$hout{'whisker'}->{'error'}="Error sending request to server: $res";
sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;}
undef $vin;
if(defined $$Z[4]){
if($LW::LW_SSL_LIB==1){ # Net::SSLeay
shutdown $$Z[0], 1;
vec($vin,fileno($$Z[0]),1)=1;
} else { # Net::SSL
shutdown $$Z[4], 1;
vec($vin,fileno($$Z[4]),1)=1;
}
} else {
vec($vin,fileno($$Z[0]),1)=1;
}
if(!select($vin,undef,undef,$TIMEOUT)){
$$hout{'whisker'}->{'error'}="Server read timed out";
sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;}
my ($LC,$CL,$TE,$CO)=('',-1,'',''); # extra header stuff
$$hout{'whisker'}->{'lowercase_incoming_headers'} =
$$W{'lowercase_incoming_headers'};
if($$W{'http_ver'} ne '0.9'){
do { # catch '100 Continue' responses
$resp=sock_getline($$Z[0],$$Z[4]);
#$resp=~tr/\r\n//d if(defined $resp);
if(!defined $resp){
$$hout{'whisker'}->{'error'}='Error reading HTTP response';
if($!){ # this should be left over from sysread via sock_getline
$$hout{'whisker'}->{'error'}.=": $!"; }
$$hout{'whisker'}->{'data'}=$$Z[6];
sock_close($$Z[0],$$Z[4]); $$Z[1]=0; # otherwise bad crap lingers
return 1;}
if(defined $$W{'save_raw_headers'}){
$$hout{'whisker'}->{'raw_header_data'}.=$resp;}
if($resp!~/^HTTP\/([0-9.]{3})[ \t]+(\d+)[ \t]{0,1}(.*?)[\r\n]+/){
$$hout{'whisker'}->{'error'}="Invalid HTTP response: $resp";
# let's save the incoming data...we might want it
$$hout{'whisker'}->{'data'}=$resp;
while(defined ($_=sock_getline($$Z[0],$$Z[4]))){
$$hout{'whisker'}->{'data'}.=$_;}
# normally we'd check the results to see if socket is closed, but
# we close it anyway, so it doesn't matter
sock_close($$Z[0],$$Z[4]); $$Z[1]=0; # otherwise bad crap lingers
return 1;}
$$hout{'whisker'}->{'http_ver'} = $1;
$$hout{'whisker'}->{'http_resp'} = $2;
$$hout{'whisker'}->{'http_resp_message'}= $3;
$$hout{'whisker'}->{'code'} = $2;
$$hout{'whisker'}->{'100_continue'}++ if($2 == 100);
while(defined ($_=sock_getline($$Z[0],$$Z[4]))){ # check pertinent headers
if(defined $$W{'save_raw_headers'}){
$$hout{'whisker'}->{'raw_header_data'}.=$_;}
$_=~s/[\r]{0,1}\n$//; # anchored regex, so it's fast
last if ($_ eq ''); # acceptable assumption case?
my $l2=index($_,':'); # this is faster than regex
$a=substr($_,0,$l2);
$b=substr($_,$l2+1);
$b=~s/^([ \t]*)//; # anchored regex, so it's fast
$hout{'whisker'}->{'abnormal_header_spacing'}++ if($1 ne ' ');
$LC = lc($a);
next if($LC eq 'whisker');
$TE = lc($b) if($LC eq 'transfer-encoding');
$CL = $b if($LC eq 'content-length');
$CO = lc($b) if($LC eq 'connection');
if($$W{'lowercase_incoming_headers'}>0){
$a=$LC;
} elsif($$W{'normalize_incoming_headers'}>0){
$a=~s/(-[a-z])/uc($1)/eg;
}
# save the received header order, in case we're curious
push(@{$$hout{'whisker'}->{'recv_header_order'}},$a);
if(defined $$hout{$a} && $$W{'ignore_duplicate_headers'}!=1){
if(!ref($$hout{$a})){
my $temp=$$hout{$a};
delete $$hout{$a};
push(@{$$hout{$a}},$temp);
}
push(@{$$hout{$a}},$b);
} else {
$$hout{$a}=$b;
} }
# did we have a socket error?
if($!){
$hout{'whisker'}->{'error'}='Error in reading response/headers';
sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1; }
if( $CO eq '' ){ # do whatever the client wanted
$CO = (defined $$hin{'Connection'}) ? lc($$hin{'Connection'}) :
'close'; }
} while($$hout{'whisker'}->{'http_resp'}==100);
} else { # http ver 0.9, we need to fake it
# Keep in mind lame broken servers, like IIS, still send headers for
# 0.9 requests; the headers are treated as data. Also keep in mind
# that if the server doesn't support HTTP 0.9 requests, it will spit
# back an HTTP 1.0 response header. User is responsible for figuring
# this out himself.
$$hout{'whisker'}->{'http_ver'}='0.9';
$$hout{'whisker'}->{'http_resp'}='200';
$$hout{'whisker'}->{'http_resp_message'}='';
}
if($$W{'force_bodysnatch'} || ( $$W{'method'} ne 'HEAD' &&
$$hout{'whisker'}->{'http_resp'}!=206 &&
$$hout{'whisker'}->{'http_resp'}!=102)){
if ($TE eq 'chunked') {
if(!defined ($a=sock_getline($$Z[0],$$Z[4]))){
$$hout{'whisker'}->{'error'}='Error reading chunked data length';
sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;}
$a=~tr/a-fA-F0-9//cd; $CL=hex($a);
$$hout{'whisker'}->{'data'}='';
while($CL!=0) { # chunked sucks
if(!defined ($temp=sock_get($$Z[0],$$Z[4],$CL))){
$$hout{'whisker'}->{'error'}="Error reading chunked data: $!";
sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;}
$$hout{'whisker'}->{'data'}=$$hout{'whisker'}->{'data'} . $temp;
$temp=sock_getline($$Z[0], $$Z[4]);
($temp=sock_getline($$Z[0], $$Z[4])) if(defined $temp &&
$temp=~/^[\r\n]*$/);
if(!defined $temp){ # this will catch errors in either sock_getline
$$hout{'whisker'}->{'error'}="Error reading chunked data: $!";
sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;}
$temp=~tr/a-fA-F0-9//cd; $CL=hex($temp);}
# read in trailer headers
while(defined ($_=sock_getline($$Z[0],$$Z[4]))){ tr/\r\n//d; last if($_ eq ''); }
# Hmmmm...error, but we should have full body. Don't return error
if($!){ $$Z[1]=0; sock_close($$Z[0],$$Z[4]); }
} else {
if ($CL != -1) {
if(!defined ($temp=sock_get($$Z[0],$$Z[4],$CL))){
$$hout{'whisker'}->{'error'}="Error reading data: $!";
sock_close($$Z[0],$$Z[4]); $$Z[1]=0; return 1;}
} else { # Yuck...read until server stops sending....
$temp=sock_getall($$Z[0],$$Z[4]);
# we go until we puke, so close socket and don't return error
sock_close($$Z[0],$$Z[4]); $$Z[1]=0;
}
$$hout{'whisker'}->{'data'}=$temp;
}
} # /method ne HEAD && http_resp ne 206 or 102/
if(($CO ne 'keep-alive' || ( defined $$hin{'Connection'} &&
lc($$hin{'Connection'}) eq 'close')) && $$W{'force_open'}!=1){
$$Z[1]=0; sock_close($$Z[0],$$Z[4]);
}
# this way we know what the state *would* have been...
$$hout{'whisker'}->{'sockstate'}=$$Z[1];
if($$W{'force_close'}>0) {
$$Z[1]=0; sock_close($$Z[0],$$Z[4]); }
if($$W{'ssl'}>0){ # we don't reuse SSL sockets
$$Z[1]=0; sock_close($$Z[0],$$Z[4]); }
$$hout{'whisker'}->{'stats_reqs'}=$$Z[8];
$$hout{'whisker'}->{'stats_syns'}=$$Z[7];
$$hout{'whisker'}->{'error'}=''; # no errors
return 0;
}
##################################################################
=pod
=head1 - Function: LW::http_req2line (INTERNAL)
Params: \%hin, $switch
Return: $request
req2line is used internally by LW::http_do_request, as well as provides a
convienient way to turn a %hin configuration into an actual HTTP request
line. If $switch is set to 1, then the returned $request will be the URI
only ('/requested/page.html'), versus the entire HTTP request ('GET
/requested/page.html HTTP/1.0\n\n'). Also, if the 'full_request_override'
whisker config variable is set in %hin, then it will be returned instead
of the constructed URI.
=cut
sub http_req2line {
my ($S,$hin,$UO)=('',@_);
$UO||=0; # shut up -w warning
# notice: full_request_override can play havoc with proxy settings
if(defined $$hin{'whisker'}->{'full_request_override'}){
return $$hin{'whisker'}->{'full_request_override'};
} else { # notice the components of a request--this is for flexibility
if($UO!=1){$S.= $$hin{'whisker'}->{'method'}.
$$hin{'whisker'}->{'method_postfix'}.
$$hin{'whisker'}->{'req_spacer'};
if($$hin{'whisker'}->{'include_host_in_uri'}>0){
$S.= 'http://';
if(defined $$hin{'whisker'}->{'uri_user'}){
$S.= $$hin{'whisker'}->{'uri_user'};
if(defined $$hin{'whisker'}->{'uri_password'}){
$S.= ':'.$$hin{'whisker'}->{'uri_user'};
}
$S.= '@';
}
$S.= $$hin{'whisker'}->{'host'}.
':'.$$hin{'whisker'}->{'port'};}}
$S.= $$hin{'whisker'}->{'uri_prefix'}.
$$hin{'whisker'}->{'uri'}.
$$hin{'whisker'}->{'uri_postfix'};
if(defined $$hin{'whisker'}->{'uri_param'}){
$S.= $$hin{'whisker'}->{'uri_param_sep'}.
$$hin{'whisker'}->{'uri_param'};}
if($UO!=1){ if($$hin{'whisker'}->{'http_ver'} ne '0.9'){
$S.= $$hin{'whisker'}->{'req_spacer2'}.'HTTP/'.
$$hin{'whisker'}->{'http_ver'}.
$$hin{'whisker'}->{'http_req_trailer'};}
$S.= $$hin{'whisker'}->{'http_eol'};}}
return $S;}
##################################################################
=pod
=head1 - Function LW::sock_close (INTERNAL)
Params: $socket_file_descriptor, $SSL_THINGY
Return: nothing
This function will close the indicated socket and SSL connection (if
necessary). They are wrapped in eval()s to make sure if the functions
puke, it doesn't kill the entire program.
=cut
sub sock_close {
my ($fd,$ssl)=@_;
if(defined $ssl){
if($LW::LW_SSL_LIB==1){ # Net::SSLeay
eval "&Net::SSLeay::free($ssl)";
# eval "&Net::SSLeay::CTX_free($$Z[3])";
} else { # Net::SSL
eval { close($ssl) }; # is that right for Net::SSL?
}
}
eval { close($fd); };
$$Z[4]=undef;
}
##################################################################
=pod
=head1 - Function LW::sock_valid (INTERNAL)
Params: $Z reference, \%hin, \%hout
Return: 1 if socket valid, 0 if socket disconnected
This is an internal function used to determine if a socket is
still good (i.e. the other END hasn't closed the connection).
This really only applies to persistent (Keep-Alive) connections.
This function is not intended for external use.
=cut
sub sock_valid {
my ($z,$Hin,$Hout)=@_;
my $slurp=$$Hin{'whisker'}->{'trailing_slurp'};
my ($o,$vin)=(undef,'');
return 0 if(defined $$z[3]); # we don't do SSL yet
# closed socket sets read flag (and so does waiting data)
vec($vin,fileno($$z[0]),1)=1;
if(select(($o=$vin),undef,undef,.01)){ # we have data to read
my ($hold, $res);
do {
$res = sysread($$z[0], $hold, 4096);
$$z[6].=$hold if($slurp==0); # save to queue
$$Hout{'whisker'}->{'slurped'}.="$hold\0"
if($slurp==1); # save to hout hash
# fall through value of 2 doesn't do anything
} while ($res && select(($o=$vin),undef,undef,.01));
if(!defined $res || $res==0){ # error or EOF
return 0;
}
}
return 1;
}
##################################################################
=pod
=head1 - Function: LW::sock_getline (INTERNAL)
Params: $socket_file_descriptor, $SSL_THINGY
Return: $string, undef on error (timeout)
This function is used internally to read a line of input (up to a '\n')
from the given socket file descriptor (regular or SSL).
This function is not intended for external use.
=cut
sub sock_getline { # read from socket w/ timeouts
my ($fd,$ssl) = @_;
my ($str,$t)=('','');
$t = index($$Z[6],"\n",0);
while($t < 0){
return undef if &http_queue_read($fd,$ssl);
$t=index($$Z[6],"\n",0);
}
# MEMLEAK: use following lines; comment out SPEEDUP and LEGACY lines
# my $r;
# ($r,$$Z[6])=unpack('A'.($t+1).'A*',$$Z[6]);
# return $r;
# SPEEDUP: use following line; comment out MEMLEAK and LEGACY lines
# return substr($$Z[6],0,$t+1,'');
# LEGACY: use following lines; comment out MEMLEAK and SPEEDUP lines
my $r = substr($$Z[6],0,$t+1);
substr($$Z[6],0,$t+1)='';
return $r;
}
##################################################################
=pod
=head1 - Function: LW::sock_get (INTERNAL)
Params: $socket_file_descriptor, $SSL_THINGY, required $amount
Return: $string, undef on error
This function is used internally to read input from the given socket
file descriptor (regular or SSL). Will abort/error if $amount is not
available.
This function is not intended for external use.
=cut
sub sock_get { # read from socket w/ timeouts
my ($fd,$ssl,$amount) = @_;
my ($str,$t)=('','');
while($amount > length($$Z[6])){
return undef if &http_queue_read($fd,$ssl);
}
# MEMLEAK: use following lines; comment out SPEEDUP and LEGACY lines
# my $r;
# ($r,$$Z[6])=unpack('A'.$amount.'A*',$$Z[6]);
# return $r;
# SPEEDUP: use following line; comment out MEMLEAK and LEGACY lines
# return substr($$Z[6],0,$amount,'');
# LEGACY: use following lines; comment out MEMLEAK and SPEEDUP lines
my $r = substr($$Z[6],0,$amount);
substr($$Z[6],0,$amount)='';
return $r;
}
##################################################################
=pod
=head1 - Function: LW::sock_getall (INTERNAL)
Params: $socket_file_descriptor, $SSL_THINGY
Return: $string
This function is used internally to read input from the given socket
file descriptor (regular or SSL). It will return everything received
until an error (no data or real error) occurs.
This function is not intended for external use.
=cut
sub sock_getall {
my ($fd,$ssl) = @_;
1 while( !(&http_queue_read($fd,$ssl)) );
return $$Z[6];
}
##################################################################
=pod
=head1 - Function: LW::http_queue_read (INTERNAL)
Params: $fd, $ssl
Return: $character, undef on error (timeout)
http_queue_read() will put incoming data from the server into
the incoming queue for reading. If there's no more data (or
on error), it will return 1. Otherwise it returns 0.
This function is really for internal use only.
=cut
sub http_queue_read {
my ($fd,$ssl)=@_;
my ($vin, $t)=('','');
if(defined $ssl){
if($LW::LW_SSL_LIB==1){ # Net::SSLeay
local $SIG{ALRM} = sub { die "timeout\n" };
local $SIG{PIPE} = sub { die "pipe_error\n" };
eval {
eval { alarm($TIMEOUT); };
$t=Net::SSLeay::read($ssl);
eval { alarm(0); };
};
if($@ || !defined $t || $t eq ''){
return 1;}
$$Z[6].=$t;
} else { # Net::SSL
if(!$ssl->read($t,1024)){ return 1;
} else { $$Z[6].=$t;}
}
} else {
vec($vin,fileno($fd),1)=1; # wait only so long to read...
if(!select($vin,undef,undef,$TIMEOUT)){
return 1;}
if(!sysread($fd,$t,4096)){ return 1; # EOF or error
} else { $$Z[6].=$t;}
}
return 0;
}
##################################################################
=pod
=head1 - Function: LW::http_queue_send (INTERNAL)
Params: $sock, $ssl
Return: $status_result (undef=ok, else error message)
This functions sends the current queue (made with LW::http_queue) to the
server via the specified SSL or socket connection.
=cut
sub http_queue_send { # write to socket
my ($fd,$ssl)=@_;
my ($v,$wrote,$err)=('');
my $len = length($$Z[5]);
if(defined $ssl){
if($LW::LW_SSL_LIB==1){ # Net::SSLeay
($wrote,$err)=Net::SSLeay::ssl_write_all($ssl,$$Z[5]);
return 'Could not send entire data queue' if ($wrote!=$len);
return "SSL_write error: $err" unless $wrote;
} else { # Net::SSL
$ssl->print($$Z[5]);
}
} else {
vec($v,fileno($fd),1)=1;
if(!select(undef,$v,undef,.01)){
return 'Socket write test failed'; }
$wrote=syswrite($fd,$$Z[5],length($$Z[5]));
return "Error sending data queue: $!" if(!defined $wrote);
return 'Could not send entire data queue' if ($wrote != $len);
}
$$Z[5]=''; return undef;
}
##################################################################
=pod
=head1 - Function: LW::http_queue (INTERNAL)
Params: $data
Return: nothing
This function will buffer the output to be sent to the server. Output is
buffered for various reasons (particularlly because of SSL, but also
allowing the chance to 'go back' and modify the final output before it's
actually sent (after header constructions, etc).
=cut
sub http_queue {
$$Z[5].= shift;
}
##################################################################
=pod
=head1 - Function: LW::http_fixup_request
Params: $hash_ref
Return: Nothing
This function takes a %hin hash reference and makes sure the proper
headers exist (for example, it will add the Host: header, calculate the
Content-Length: header for POST requests, etc). For standard requests
(i.e. you want the request to be HTTP RFC-compliant), you should call this
function right before you call LW::http_do_request.
=cut
sub http_fixup_request {
my $hin=shift;
return if(!(defined $hin && ref($hin)));
if($$hin{'whisker'}->{'http_ver'} eq '1.1'){
$$hin{'Host'}=$$hin{'whisker'}->{'host'} if(!defined $$hin{'Host'});
$$hin{'Connection'}='Keep-Alive' if(!defined $$hin{'Connection'});
}
if(defined $$hin{'whisker'}->{'data'}){
if(!defined $$hin{'Content-Length'}){
$$hin{'Content-Length'}=length($$hin{'whisker'}->{'data'});}
# if(!defined $$hin{'Content-Encoding'}){
# $$hin{'Content-Encoding'}='application/x-www-form-urlencoded';}
}
if(defined $$hin{'whisker'}->{'proxy_host'}){
$$hin{'whisker'}->{'include_host_in_uri'}=1;}
}
##################################################################
=pod
=head1 - Function: LW::http_reset
Params: Nothing
Return: Nothing
The LW::http_reset function will walk through the %http_host_cache,
closing all open sockets and freeing SSL resources. It also clears
out the host cache in case you need to rerun everything fresh.
=cut
sub http_reset {
my $key;
foreach $key (keys %http_host_cache){
# *Z=$http_host_cache{$key};
sock_close($http_host_cache{$key}->[0],
$http_host_cache{$key}->[4]);
my $x=$http_host_cache{$key}->[3];
if(defined $x && $LW::LW_SSL_LIB==1){
eval "Net::SSLeay::CTX_free($x)"; }
delete $http_host_cache{$key};
}
}
##################################################################
=pod
=head1 - Function: LW::ssl_save_info (INTERNAL)
Params: \%hout, $ssl_connection
Return: Nothing
This is an internal function used to save various Net::SSLeay
information into the given hash. Triggered by setting
{'whisker'}->{'save_ssl_info'}=1.
=cut
sub ssl_save_info {
my ($hr,$SSL)=@_;
my $cert;
return if($LW::LW_SSL_LIB!=1); # only Net::SSLeay used
$$hr{'whisker'}->{'ssl_cipher'}=Net::SSLeay::get_cipher($SSL);
if( $cert = Net::SSLeay::get_peer_certificate($SSL)){
$$hr{'whisker'}->{'ssl_cert_subject'} =
Net::SSLeay::X509_NAME_oneline(
Net::SSLeay::X509_get_subject_name($cert) );
$$hr{'whisker'}->{'ssl_cert_issuer'} =
Net::SSLeay::X509_NAME_oneline(
Net::SSLeay::X509_get_issuer_name($cert) );
}
}
##################################################################
{ $SYMCOUNT = 0;
sub _newsym { # same as Symbol::gensym; taken from libwhisker2
my $pkg="LW::";
my $name = "_STREAM_" . $SYMCOUNT++;
delete $$pkg{$name};
return \*{$pkg.$name};
}}
##################################################################
=pod
=head1 ++ Sub package: mdx
The mdx subpackage contains support for making MD4 and MD5 hashes of the
given data. It will attempt to use a faster perl module if installed,
and will fall back on the internal perl version (which is *slow* in
comparison) if nothing else was found.
This was written in a few hours using the explanation of Applied
Cryptography as the main reference, and Digest::Perl::MD5 as a secondary
reference. MD4 was later added, using Authen::NTLM::MD4 as a reference.
This code should be cross-platform (particularly 64-bit) compatible; if
you get errors, contact rfp@wiretrip.net.
=cut
########################################################################
{ # start md5 packaged varbs
my (@S,@T,@M);
my $code='';
=pod
=head1 - Function: LW::md5
Params: $data
Return: $hex_md5_string
This function takes a data scalar, and composes a MD5 hash of it, and
returns it in a hex ascii string. It will use the fastest MD5 function
available.
=cut
sub md5 {
return undef if(!defined $_[0]); # oops, forgot the data
return MD5->hexhash($_[0]) if(defined $LW::available{'md5'});
return md5_perl($_[0]);
}
########################################################################
=pod
=head1 - Function: LW::md5_perl
Params: $data
Return: $hex_md5_string
This is the perl implementation of the MD5 function. You should use
the md5() function, which will call this function as a last resort.
You can call this function directly if you want to test the code.
=cut
sub md5_perl {
my $DATA=shift;
$DATA=md5_pad($DATA);
&md5_init() if(!defined $M[0]);
return md5_perl_generated(\$DATA);
}
########################################################################
=pod
=head1 - Function: LW::md5_init (INTERNAL)
Params: nothing
Return: nothing
This function generates particular values used in the md5_perl function.
Normally you do not have to call it, as md5_perl will call it if needed.
The values here are special MD5 constants.
=cut
sub md5_init {
return if(defined $S[0]);
for(my $i=1; $i<=64; $i++){ $T[$i-1]=int((2**32)*abs(sin($i))); }
my @t=(7,12,17,22,5,9,14,20,4,11,16,23,6,10,15,21);
for($i=0; $i<64; $i++){ $S[$i]=$t[(int($i/16)*4)+($i%4)]; }
@M=( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
1,6,11,0,5,10,15,4,9,14,3,8,13,2,7,12,
5,8,11,14,1,4,7,10,13,0,3,6,9,12,15,2,
0,7,14,5,12,3,10,1,8,15,6,13,4,11,2,9 );
&md5_generate();
# check to see if it works correctly
my $TEST=md5_pad('foobar');
if( md5_perl_generated(\$TEST) ne
'3858f62230ac3c915f300c664312c63f'){
die('Error: MD5 self-test not successful.');
}
}
########################################################################
=pod
=head1 - Function: LW::md5_pad (INTERNAL)
Params: $data
Return: $padded_data
This function pads the data to be compatible with MD5.
This function is from Digest::Perl::MD5, and bears the following
copyrights:
Copyright 2000 Christian Lackas, Imperia Software Solutions
Copyright 1998-1999 Gisle Aas.
Copyright 1995-1996 Neil Winton.
Copyright 1991-1992 RSA Data Security, Inc.
=cut
sub md5_pad {
my $l = length(my $msg=shift() . chr(128));
$ msg .= "\0" x (($l%64<=56?56:120)-$l%64);
$l=($l-1)*8;
$msg .= pack 'VV',$l & 0xffffffff, ($l >> 16 >> 16);
return $msg;
}
########################################################################
=pod
=head1 - Function: LW::md5_generate (INTERNAL)
Params: none
Return: none
This functions generates and compiles the actual MD5 function. It's
faster to have all the operations inline and in order than to call
functions. Generating the code via below function cuts the final
code savings to about 1/50th, with the penalty of having to compile
it the first time it's used (which takes all of a second or two).
=cut
sub md5_generate {
my $N='abcddabccdabbcda';
my $M='';
$M='&0xffffffff' if((1 << 16) << 16); # mask for 64bit systems
$code=<<EOT;
sub md5_perl_generated {
BEGIN { \$^H |= 1; }; # use integer
my (\$A,\$B,\$C,\$D)=(0x67452301,0xefcdab89,0x98badcfe,0x10325476);
my (\$a,\$b,\$c,\$d,\$t,\$i);
my \$dr=shift;
my \$l=length(\$\$dr);
for my \$L (0 .. ((\$l/64)-1) ) {
my \@D = unpack('V16', substr(\$\$dr, \$L*64,64));
(\$a,\$b,\$c,\$d)=(\$A,\$B,\$C,\$D);
EOT
for($i=0; $i<16; $i++){
my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));
$code.="\$t=((\$$d^(\$$b\&(\$$c^\$$d)))+\$$a+\$D[$M[$i]]+$T[$i])$M;\n";
$code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1)))+\$$b)$M;\n";
}
for(; $i<32; $i++){
my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));
$code.="\$t=((\$$c^(\$$d\&(\$$b^\$$c)))+\$$a+\$D[$M[$i]]+$T[$i])$M;\n";
$code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1)))+\$$b)$M;\n";
}
for(; $i<48; $i++){
my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));
$code.="\$t=((\$$b^\$$c^\$$d)+\$$a+\$D[$M[$i]]+$T[$i])$M;\n";
$code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1)))+\$$b)$M;\n";
}
for(; $i<64; $i++){
my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));
$code.="\$t=((\$$c^(\$$b|(~\$$d)))+\$$a+\$D[$M[$i]]+$T[$i])$M;\n";
$code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1)))+\$$b)$M;\n";
}
$code.=<<EOT;
\$A=\$A+\$a\&0xffffffff; \$B=\$B+\$b\&0xffffffff;
\$C=\$C+\$c\&0xffffffff; \$D=\$D+\$d\&0xffffffff;
} # for
return unpack('H*', pack('V4',\$A,\$B,\$C,\$D)); }
EOT
eval "$code";
}
} # md5 package container
########################################################################
{ # start md4 packaged varbs
my (@S,@T,@M);
my $code='';
=pod
=head1 - Function: LW::md4
Params: $data
Return: $hex_md4_string
This function takes a data scalar, and composes a MD4 hash of it, and
returns it in a hex ascii string. It will use the fastest MD4 function
available.
=cut
sub md4 {
return undef if(!defined $_[0]); # oops, forgot the data
md4_perl(@_);
}
########################################################################
=pod
=head1 - Function: LW::md4_perl
Params: $data
Return: $hex_md4_string
This is the perl implementation of the MD4 function. You should use
the md4() function, which will call this function as a last resort.
You can call this function directly if you want to test the code.
=cut
sub md4_perl {
my $DATA=shift;
$DATA=md5_pad($DATA);
&md4_init() if(!defined $M[0]);
return md4_perl_generated(\$DATA);
}
########################################################################
=pod
=head1 - Function: LW::md4_init (INTERNAL)
Params: none
Return: none
This functions generates and compiles the actual MD4 function. It's
faster to have all the operations inline and in order than to call
functions. Generating the code via below function cuts the final
code savings to about 1/50th, with the penalty of having to compile
it the first time it's used (which takes all of a second or two).
=cut
sub md4_init {
return if(defined $S[0]);
my @t=(3,7,11,19,3,5,9,13,3,9,11,15);
for($i=0; $i<48; $i++){ $S[$i]=$t[(int($i/16)*4)+($i%4)]; }
@M=( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,
0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15 );
my $N='abcddabccdabbcda';
my $M='';
$M='&0xffffffff' if((1 << 16) << 16); # mask for 64bit systems
$code=<<EOT;
sub md4_perl_generated {
BEGIN { \$^H |= 1; }; # use integer
my (\$A,\$B,\$C,\$D)=(0x67452301,0xefcdab89,0x98badcfe,0x10325476);
my (\$a,\$b,\$c,\$d,\$t,\$i);
my \$dr=shift;
my \$l=length(\$\$dr);
for my \$L (0 .. ((\$l/64)-1) ) {
my \@D = unpack('V16', substr(\$\$dr, \$L*64,64));
(\$a,\$b,\$c,\$d)=(\$A,\$B,\$C,\$D);
EOT
for($i=0; $i<16; $i++){
my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));
$code.="\$t=((\$$d^(\$$b\&(\$$c^\$$d)))+\$$a+\$D[$M[$i]])$M;\n";
$code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1))))$M;\n";
}
for(; $i<32; $i++){
my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));
$code.="\$t=(( (\$$b&\$$c)|(\$$b&\$$d)|(\$$c&\$$d) )+\$$a+\$D[$M[$i]]+0x5a827999)$M;\n";
$code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1))))$M;\n";
}
for(; $i<48; $i++){
my ($a,$b,$c,$d)=split('',substr($N,($i%4)*4,4));
$code.="\$t=(( \$$b^\$$c^\$$d )+\$$a+\$D[$M[$i]]+0x6ed9eba1)$M;\n";
$code.="\$$a=(((\$t<<$S[$i])|((\$t>>(32-$S[$i]))&((1<<$S[$i])-1))))$M;\n";
}
$code.=<<EOT;
\$A=\$A+\$a\&0xffffffff; \$B=\$B+\$b\&0xffffffff;
\$C=\$C+\$c\&0xffffffff; \$D=\$D+\$d\&0xffffffff;
} # for
return unpack('H*', pack('V4',\$A,\$B,\$C,\$D)); }
EOT
eval "$code";
my $TEST=md5_pad('foobar');
if( md4_perl_generated(\$TEST) ne
'547aefd231dcbaac398625718336f143'){
die('Error: MD4 self-test not successful.');
}
}
} # md4 package container
=pod
=head1 ++ Sub package: multipart
The multipart subpackage contains various utility functions which
support making multipart requests (useful for uploading files).
=cut
########################################################################
=pod
=head1 - Function: LW::multipart_set
Params: \%multi_hash, $param_name, $param_value
Return: nothing
This function sets the named parameter to the given value within the
supplied multipart hash.
=cut
sub multipart_set {
my ($hr,$n,$v)=@_;
return if(!ref($hr)); # error check
return undef if(!defined $n || $n eq '');
$$hr{$n}=$v;
}
########################################################################
=pod
=head1 - Function: LW::multipart_get
Params: \%multi_hash, $param_name
Return: $param_value, undef on error
This function retrieves the named parameter to the given value within the
supplied multipart hash. There is a special case where the named
parameter is actually a file--in which case the resulting value will be
"\0FILE". In general, all special values will be prefixed with a NULL
character. In order to get a file's info, use multipart_getfile().
=cut
sub multipart_get {
my ($hr,$n)=@_;
return undef if(!ref($hr)); # error check
return undef if(!defined $n || $n eq '');
return $$hr{$n};
}
########################################################################
=pod
=head1 - Function: LW::multipart_setfile
Params: \%multi_hash, $param_name, $file_path [, $filename]
Return: undef on error, 1 on success
NOTE: this function does not actually add the contents of $file_path into
the %multi_hash; instead, multipart_write() inserts the content when
generating the final request.
=cut
sub multipart_setfile {
my ($hr,$n,$path)=(shift,shift,shift);
my ($fname)=shift;
return undef if(!ref($hr)); # error check
return undef if(!defined $n || $n eq '');
return undef if(!defined $path);
return undef if(! (-e $path && -f $path) );
if(!defined $fname){
$path=~m/[\\\/]([^\\\/]+)$/;
$fname=$1||"whisker-file";
}
$$hr{$n}="\0FILE";
$$hr{"\0$n"}=[$path,$fname];
return 1;
}
########################################################################
=pod
=head1 - Function: LW::multipart_getfile
Params: \%multi_hash, $file_param_name
Return: $path, $name ($path=undef on error)
LW::multipart_getfile is used to retrieve information for a file
parameter contained in %multi_hash. To use this you would most
likely do:
($path,$fname)=LW::multipart_getfile(\%multi, "param_name");
=cut
sub multipart_getfile {
my ($hr,$n)=@_;
return undef if(!ref($hr)); # error check
return undef if(!defined $n || $n eq '');
return undef if(!defined $$hr{$n} || $$hr{$n} ne "\0FILE");
return @{$$hr{"\0$n"}};
}
########################################################################
=pod
=head1 - Function: LW::multipart_boundary
Params: \%multi_hash [, $new_boundary_name]
Return: $current_boundary_name
LW::multipart_boundary is used to retrieve, and optionally set, the
multipart boundary used for the request.
NOTE: the function does no checking on the supplied boundary, so if
you want things to work make sure it's a legit boundary. Libwhisker
does *not* prefix it with any '---' characters.
=cut
sub multipart_boundary {
my ($hr,$new)=@_;
my $ret;
return undef if(!ref($hr)); # error check
if(!defined $$hr{"\0BOUNDARY"}){
# create boundary on the fly
my $b = uc(LW::utils_randstr(20));
my $b2 = '-' x 32;
$$hr{"\0BOUNDARY"}="$b2$b";
}
$ret=$$hr{"\0BOUNDARY"};
if(defined $new){
$$hr{"\0BOUNDARY"}=$new;
}
return $ret;
}
########################################################################
=pod
=head1 - Function: LW::multipart_write
Params: \%multi_hash, \%hin_request
Return: 1 if successful, undef on error
LW::multipart_write is used to parse and construct the multipart data
contained in %multi_hash, and place it ready to go in the given whisker
hash (%hin) structure, to be sent to the server.
NOTE: file contents are read into the final %hin, so it's possible for
the hash to get *very* large if you have (a) large file(s).
=cut
sub multipart_write {
my ($mp,$hr)=@_;
return undef if(!ref($mp)); # error check
return undef if(!ref($hr)); # error check
if(!defined $$mp{"\0BOUNDARY"}){
# create boundary on the fly
my $b = uc(LW::utils_randstr(20));
my $b2 = '-' x 32;
$$mp{"\0BOUNDARY"}="$b2$b";
}
my $B = $$mp{"\0BOUNDARY"};
my $EOL = $$hr{whisker}->{http_eol}||"\x0d\x0a";
my $keycount=0;
foreach (keys %$mp){
next if(substr($_,0,1) eq "\0");
$keycount++;
if($$mp{$_} eq "\0FILE"){
my ($path,$name)=LW::multipart_getfile($mp,$_);
next if(!defined $path);
$$hr{whisker}->{data}.="$B$EOL";
$$hr{whisker}->{data}.="Content-Disposition: ".
"form-data; name=\"$_\"; ";
$$hr{whisker}->{data}.="filename=\"$name\"$EOL";
$$hr{whisker}->{data}.="Content-Type: ".
"application/octet-stream$EOL";
$$hr{whisker}->{data}.=$EOL;
next if(!open(IN,"<$path"));
binmode(IN); # stupid Windows
while(<IN>){
$$hr{whisker}->{data}.=$_; }
close(IN);
$$hr{whisker}->{data}.=$EOL; # WARNING: is this right?
} else {
$$hr{whisker}->{data}.="$B$EOL";
$$hr{whisker}->{data}.="Content-Disposition: ".
"form-data; name=\"$_\"$EOL";
$$hr{whisker}->{data}.="$EOL$$mp{$_}$EOL";
}
}
if($keycount){
$$hr{whisker}->{data}.="$B--$EOL"; # closing boundary
$$hr{"Content-Length"}=length($$hr{whisker}->{data});
$$hr{"Content-Type"}="multipart/form-data; boundary=$B";
return 1;
} else {
# multipart hash didn't contain params to upload
return undef;
}
}
########################################################################
=pod
=head1 - Function: LW::multipart_read
Params: \%multi_hash, \%hout_response [, $filepath ]
Return: 1 if successful, undef on error
LW::multipart_read will parse the data contents of the supplied
%hout_response hash, by passing the appropriate info to
multipart_read_data(). Please see multipart_read_data() for more
info on parameters and behaviour.
NOTE: this function will return an error if the given %hout_response
Content-Type is not set to "multipart/form-data".
=cut
sub multipart_read {
my ($mp, $hr, $fp)=@_;
return undef if(!(defined $mp && ref($mp)));
return undef if(!(defined $hr && ref($hr)));
my $ctype = LW::utils_find_lowercase_key($hr,'content-type');
return undef if(!defined $ctype);
return undef if($ctype!~m#^multipart/form-data#i);
return LW::multipart_read_data($mp,
\${$hr{'whisker'}->{'data'}},undef,$fp);
}
########################################################################
=pod
=head1 - Function: LW::multipart_read_data
Params: \%multi_hash, \$data, $boundary [, $filepath ]
Return: 1 if successful, undef on error
LW::multipart_read_data parses the contents of the supplied data using
the given boundary and puts the values in the supplied %multi_hash.
Embedded files will *not* be saved unless a $filepath is given, which
should be a directory suitable for writing out temporary files.
NOTE: currently only application/octet-stream is the only supported
file encoding. All other file encodings will not be parsed/saved.
=cut
sub multipart_read_data {
my ($mp, $dr, $bound, $fp)=@_;
return undef if(!(defined $mp && ref($mp)));
return undef if(!(defined $dr && ref($dr)));
# if $bound is undef, then we'll snag what looks to be
# the first boundry from the data.
if(!defined $bound){
if($$dr=~/([-]{5,}[A-Z0-9]+)[\r\n]/i){
$bound=$1;
} else {
# we didn't spot a typical boundary; error
return undef;
}
}
if(defined $fp && !(-d $fp && -w $fp)){
$fp=undef; }
my $line = LW::utils_getline_crlf($dr,0);
return undef if(!defined $line);
return undef if( index($line,$bound) != 0);
my $done=0;
while(!$done){
$done=multipart_read_data_part($mp, $dr, $bound, $fp);
}
return 1;
}
########################################################################
=pod
=head1 - Function: LW::multipart_read_data_part (INTERNAL)
Params: \%multi_hash, \$data, $boundary, $filepath
Return: 0 if more to read, 1 if done
This is an internal function used by multipart_read_data, and should
not be called on it's own. This is the workhorse, and is quite nasty.
=cut
sub multipart_read_data_part {
my ($mp, $dr, $bound, $fp)=@_;
my $dispinfo = LW::utils_getline_crlf($dr);
return 1 if(!defined $dispinfo);
return 1 if(length($dispinfo)==0);
my $lcdisp = lc($dispinfo);
if(index($lcdisp,'content-disposition: form-data;') != 0){
return 1; } # bad disposition
my ($s,$e,$l);
$s=index($lcdisp,'name="',30);
$e=index($lcdisp, '"', $s+6);
return 1 if($s == -1 || $e == -1);
my $NAME=substr($dispinfo,$s+6,$e-$s-6);
$s=index($lcdisp,'filename="',$e);
my $FILENAME=undef;
if($s != -1){
$e=index($lcdisp, '"', $s+10);
return 1 if($e == -1); # puke; malformed filename
$FILENAME=substr($dispinfo,$s+10,$e-$s-10);
$s=rindex($FILENAME,'\\');
$e=rindex($FILENAME,'/');
$s=$e if($e>$s);
$FILENAME=substr($FILENAME,$s+1,length($FILENAME)-$s);
}
my $CTYPE = LW::utils_getline_crlf($dr);
return 1 if(!defined $CTYPE);
$CTYPE = lc($CTYPE);
if(length($CTYPE)>0){
$s=index($CTYPE,'content-type:');
return 1 if($s!=0); # bad ctype line
$CTYPE=substr($CTYPE,13,length($CTYPE)-13);
$CTYPE=~tr/ \t//d;
my $xx=LW::utils_getline_crlf($dr);
return 1 if(!defined $xx);
return 1 if(length($xx)>0);
} else {
$CTYPE='application/octet-stream';
}
my $VALUE='';
while( defined ($l=LW::utils_getline_crlf($dr)) ){
last if(index($l,$bound)==0);
$VALUE.=$l;
$VALUE.="\r\n";
}
substr($VALUE,-2,2)='';
if(!defined $FILENAME){ # read in param
$$mp{$NAME}=$VALUE;
return 0;
} else { # read in file
$$mp{$NAME}="\0FILE";
return 0 if(!defined $fp);
# TODO: funky content types, like application/x-macbinary
if($CTYPE ne 'application/octet-stream'){
return 0; }
my $rfn = lc(LW::utils_randstr(12));
my $fullpath = "$fp$rfn";
$$mp{"\0$NAME"}=[undef,$FILENAME];
return 0 if(!open(OUT,">$fullpath")); # error opening file
binmode(OUT); # stupid Windows
$$mp{"\0$NAME"}=[$fullpath,$FILENAME];
print OUT $VALUE;
close(OUT);
return 0;
} # if !defined $FILENAME
return 0; # um, this should never be reached...
}
########################################################################
=pod
=head1 - Function: LW::multipart_files_list
Params: \%multi_hash
Return: @files
LW::multipart_files_list returns an array of parameter names for all
the files that are contained in %multi_hash.
=cut
sub multipart_files_list {
my ($mp)=shift;
my @ret;
return () if(!(defined $mp && ref($mp)));
while( my ($K, $V)=each(%$mp)){
push(@ret,$K) if($V eq "\0FILE"); }
return @ret;
}
########################################################################
=pod
=head1 - Function: LW::multipart_params_list
Params: \%multi_hash
Return: @params
LW::multipart_files_list returns an array of parameter names for all
the regular parameters (non-file) that are contained in %multi_hash.
=cut
sub multipart_params_list {
my ($mp)=shift;
my @ret;
return () if(!(defined $mp && ref($mp)));
while( my ($K, $V)=each(%$mp)){
push(@ret,$K) if($V ne "\0FILE" &&
substr($K,0,1) ne "\0" );
}
return @ret;
}
########################################################################
=pod
=head1 ++ Sub package: ntlm
The ntlm sub package implements ntlm authentication routines.
=cut
########################################################################
=pod
=head1 - Function: LW::ntlm_new
Params: $username, $password [, $domain, $ntlm_only]
Return: $ntlm_object
Returns a reference to an array (otherwise known as the 'ntlm object')
which contains the various informations specific to a user/pass combo.
If $ntlm_only is set to 1, then only the NTLM hash (and not the LanMan
hash) will be generated. This results in a speed boost, and is typically
fine for using against IIS servers.
The array contains the following items, in order:
username, password, domain, lmhash(password), ntlmhash(password)
=cut
sub ntlm_new {
my ($user,$pass,$domain,$flag)=@_;
$flag||=0;
return undef if(!defined $user);
$pass||=''; $domain||='';
my @a=("$user","$pass","$domain",undef,undef);
my $t;
if($flag==0){
$t=substr($pass,0,14);
$t=~tr/a-z/A-Z/;
$t.= "\0"x(14-length($t));
$a[3]=des_E_P16($t); # LanMan password hash
$a[3].= "\0"x(21-length($a[3]));
}
$t=md4(encode_unicode($pass));
$t=~s/([a-z0-9]{2})/sprintf("%c",hex($1))/ieg;
$t.="\0"x(21-length($t));
$a[4]=$t; # NTLM password hash
&des_cache_reset(); # reset the keys hash
return \@a;
}
########################################################################
=pod
=head1 - Function: LW::ntlm_generate_responses (INTERNAL)
Params: $ntlm_object, $challenge_token
Return: $lanman_hash, $ntlm_hash
Returns the challenge responses to the given tokens, using the password
set in the $ntlm_object.
=cut
sub ntlm_generate_responses {
my ($obj,$chal)=@_;
return (undef,undef) if(!defined $obj || !defined $chal);
return (undef,undef) if(!ref($obj));
my $x='';
$x=des_E_P24($obj->[3], $chal) if(defined $obj->[3]);
return ($x, des_E_P24($obj->[4], $chal));
}
########################################################################
=pod
=head1 - Function: LW::ntlm_decode_challenge (INTERNAL)
Params: $challenge
Return: @challenge_parts
Splits the supplied challenge into the various parts. The returned array
contains elements in the following order:
unicode_domain, ident, packet_type, domain_len, domain_maxlen,
domain_offset, flags, challenge_token, reserved, empty, raw_data
=cut
sub ntlm_decode_challenge {
return undef if(!defined $_[0]);
my $chal=shift;
my @res;
@res=unpack('Z8VvvVVa8a8a8',substr($chal,0,48));
push(@res,substr($chal,48));
unshift(@res,substr($chal,$res[4],$res[2]));
return @res;
}
########################################################################
=pod
=head1 - Function: LW::ntlm_header (INTERNAL)
Params: $string, $header_length, $offset
Return: $header
Constructs an appropriate header for the supplied $string.
=cut
sub ntlm_header {
my ($s,$h,$o)=@_;
my $l=length($s);
return pack('vvV',0,0,$o-$h) if($l==0);
return pack('vvV',$l,$l,$o);
}
########################################################################
=pod
=head1 - Function: LW::ntlm_client
Params: $ntlm_obj [, $server_challenge]
Return: $response
ntlm_client() is responsible for generating the base64-encoded text you
include in the HTTP Authorization header. If you call ntlm_client()
without a $server_challenge, the function will return the initial NTLM
request packet (message packet #1). You send this to the server, and
take the server's response (message packet #2) and pass that as
$server_challenge, causing ntlm_client() to generate the final response
packet (message packet #3).
Note: $server_challenge is expected to be base64 encoded.
=cut
sub ntlm_client {
my ($obj,$p)=@_;
my $resp="NTLMSSP\0";
return undef if(!defined $obj || !ref($obj));
if(defined $p && $p ne ''){ # answer challenge
$p=~tr/ \t\r\n//d;
$p=LW::decode_base64($p);
my @c=ntlm_decode_challenge($p);
$uu=encode_unicode($obj->[0]); # username
$resp.=pack('V',3);
my($hl,$hn)=ntlm_generate_responses($obj,$c[7]); # token
return undef if(!defined $hl || !defined $hn);
my $o=64;
$resp.=ntlm_header($hl,64,$o); # LM hash
$resp.=ntlm_header($hn,64,($o+=length($hl))); # NTLM hash
$resp.=ntlm_header($c[0],64,($o+=length($hn))); # domain
$resp.=ntlm_header($uu,64,($o+=length($c[0]))); # username
$resp.=ntlm_header($uu,64,($o+=length($uu))); # workstation
$resp.=ntlm_header('',64,($o+=length($uu))); # session
$resp.=pack('V',$c[6]);
$resp.=$hl.$hn.$c[0].$uu.$uu;
} else { # initiate challenge
$resp.=pack('VV',1,0x0000b207);
$resp.=ntlm_header($obj->[0],32,32);
$resp.=ntlm_header($obj->[2],32,32+length($obj->[0]));
$resp .= $obj->[0].$obj->[2];
}
return encode_base64($resp,'');
}
########################################################################
=pod
=head1 ++ Sub package: ntlm_des
The ntlm_des sub package implements unchained forward DES in perl, which
is needed by the ntlm auth package to do it's thing. Note that
unchained forward DES is not a symmetrical cipher--it's much more like
using DES as a digest/hash algorithm. Thus there is very little
practical reuse of this code outside of NTLM authentication.
The code below has also been 'tweaked' for the reuse of the set of keys,
which is typical when requiring multiple authentication runs. This leads
to a speed increase when multiple authentications are needed.
The code below is a highly-modified version of Authen::NTLM::DES.pm,
written by Mark.Bush@bushnet.demon.co.uk. Portions of the code below
bear the following copyrights:
Copyright (C) 2001 Mark Bush. <Mark.Bush@bushnet.demon.co.uk>
The code is based on fetchmail code which is Copyright (C) 1997 Eric
S. Raymond.
Fetchmail uses SMB/Netbios code from samba which is Copyright (C)
Andrew Tridgell 1992-1998 with modifications from Jeremy Allison.
All the des_* functions should be considered internal and not called
directly.
=cut
{ # start of DES local container #######################################
my $generated=0;
my $perm1 = [57, 49, 41, 33, 25, 17, 9, 1, 58, 50, 42, 34, 26, 18,
10, 2, 59, 51, 43, 35, 27, 19, 11, 3, 60, 52, 44, 36,
63, 55, 47, 39, 31, 23, 15, 7, 62, 54, 46, 38, 30, 22,
14, 6, 61, 53, 45, 37, 29, 21, 13, 5, 28, 20, 12, 4];
my $perm2 = [14, 17, 11, 24, 1, 5, 3, 28, 15, 6, 21, 10,
23, 19, 12, 4, 26, 8, 16, 7, 27, 20, 13, 2,
41, 52, 31, 37, 47, 55, 30, 40, 51, 45, 33, 48,
44, 49, 39, 56, 34, 53, 46, 42, 50, 36, 29, 32];
my $perm3 = [58, 50, 42, 34, 26, 18, 10, 2, 60, 52, 44, 36, 28, 20, 12, 4,
62, 54, 46, 38, 30, 22, 14, 6, 64, 56, 48, 40, 32, 24, 16, 8,
57, 49, 41, 33, 25, 17, 9, 1, 59, 51, 43, 35, 27, 19, 11, 3,
61, 53, 45, 37, 29, 21, 13, 5, 63, 55, 47, 39, 31, 23, 15, 7];
my $perm4 = [32, 1, 2, 3, 4, 5, 4, 5, 6, 7, 8, 9,
8, 9, 10, 11, 12, 13, 12, 13, 14, 15, 16, 17,
16, 17, 18, 19, 20, 21, 20, 21, 22, 23, 24, 25,
24, 25, 26, 27, 28, 29, 28, 29, 30, 31, 32, 1];
my $perm5 = [16, 7, 20, 21, 29, 12, 28, 17, 1, 15, 23, 26, 5, 18, 31, 10,
2, 8, 24, 14, 32, 27, 3, 9, 19, 13, 30, 6, 22, 11, 4, 25];
my $perm6 = [40, 8, 48, 16, 56, 24, 64, 32, 39, 7, 47, 15, 55, 23, 63, 31,
38, 6, 46, 14, 54, 22, 62, 30, 37, 5, 45, 13, 53, 21, 61, 29,
36, 4, 44, 12, 52, 20, 60, 28, 35, 3, 43, 11, 51, 19, 59, 27,
34, 2, 42, 10, 50, 18, 58, 26, 33, 1, 41, 9, 49, 17, 57, 25];
my $sc = [1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1];
sub des_E_P16 {
my ($p14) = @_;
my $sp8 = [0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25];
my $p7 = substr($p14, 0, 7);
my $p16 = des_smbhash($sp8, $p7);
$p7 = substr($p14, 7, 7);
$p16 .= des_smbhash($sp8, $p7);
return $p16;
}
sub des_E_P24 {
my ($p21, $c8_str) = @_;
my @c8 = map {ord($_)} split(//, $c8_str);
my $p24 = des_smbhash(\@c8, substr($p21, 0, 7));
$p24 .= des_smbhash(\@c8, substr($p21, 7, 7));
$p24 .= des_smbhash(\@c8, substr($p21, 14, 7));
}
sub des_permute {
my ($i,$out, $in, $p, $n) = (0,@_);
foreach $i (0..($n-1)){
$out->[$i] = $in->[$p->[$i]-1]; }
}
sub des_lshift {
my ($c, $d, $count)=@_;
my (@outc, @outd, $i, $x);
while($count--){
push @$c, shift @$c;
push @$d, shift @$d;
}
}
my %dohash_cache; # cache for key data; saves some cycles
my %key_cache; # another cache for key data
sub des_cache_reset {
%dohash_cache=();
%key_cache=();
}
sub des_dohash
{
my ($out, $in, $key) = @_;
my ($i, $j, $k, @pk1, @c, @d, @cd,
@ki, @pd1, @l, @r, @rl);
# if(!defined $dohash_cache{$skey}){
&des_permute(\@pk1, $key, $perm1, 56);
for($i=0;$i<28;$i++) {
$c[$i] = $pk1[$i];
$d[$i] = $pk1[$i+28];
}
for($i=0;$i<16;$i++){
my @array;
&des_lshift(\@c,\@d,$sc->[$i]);
@cd = (@c, @d);
&des_permute(\@array, \@cd, $perm2, 48);
$ki[$i] = \@array;
# $dohash_cache{$skey}->[$i]=\@array;
}
# } else {
# for($i=0;$i<16;$i++){
# $ki[$i]=$dohash_cache{$skey}->[$i];}
# }
des_dohash2($in,\@l,\@r,\@ki);
@rl = (@r, @l);
&des_permute($out, \@rl, $perm6, 64);
}
sub des_str_to_key{
my ($str) = @_;
my ($i,@key,$out);
unshift(@str,ord($_))while($_=chop($str));
$key[0] = $str[0]>>1;
$key[1] = (($str[0]&0x01)<<6) | ($str[1]>>2);
$key[2] = (($str[1]&0x03)<<5) | ($str[2]>>3);
$key[3] = (($str[2]&0x07)<<4) | ($str[3]>>4);
$key[4] = (($str[3]&0x0f)<<3) | ($str[4]>>5);
$key[5] = (($str[4]&0x1f)<<2) | ($str[5]>>6);
$key[6] = (($str[5]&0x3f)<<1) | ($str[6]>>7);
$key[7] = $str[6]&0x7f;
foreach $i (0..7) {
$key[$i] = 0xff&($key[$i]<<1); }
@{$key_cache{$str}}=@key;
return \@key;
}
sub des_smbhash
{
# use faster binary helper
goto &LW::bin::des_smbhash if(defined $LW::available{'lw::bin'});
my ($in, $key) = @_;
my $key2;
&des_generate if(!$generated);
if(defined $key_cache{$key}){ $key2=$key_cache{$key};
} else { $key2=&des_str_to_key($key); }
my ($i, $div, $mod, @in, @outb, @inb, @keyb, @out);
foreach $i (0..63){
$div = int($i/8); $mod = $i%8;
$inb[$i] = ($in->[$div] & (1<<(7-($mod))))? 1: 0;
$keyb[$i] = ($key2->[$div] & (1<<(7-($mod))))? 1: 0;
$outb[$i] = 0;
}
&des_dohash(\@outb, \@inb, \@keyb);
foreach $i (0..7){ $out[$i] = 0; }
foreach $i (0..63){
$out[int($i/8)] |= (1<<(7-($i%8))) if ($outb[$i]); }
my $out = pack("C8", @out);
return $out;
}
sub des_generate { # really scary dragons here....this code is optimized
# for speed, and not readability
my ($i,$j);
my $code=<<EOT;
{ my \$sbox = [[
[14,4,13,1,2,15,11,8,3,10,6,12,5,9,0,7],[0,15,7,4,14,2,13,1,10,6,12,11,9,5,3,8],
[4,1,14,8,13,6,2,11,15,12,9,7,3,10,5,0],[15,12,8,2,4,9,1,7,5,11,3,14,10,0,6,13]
],[
[15,1,8,14,6,11,3,4,9,7,2,13,12,0,5,10],[3,13,4,7,15,2,8,14,12,0,1,10,6,9,11,5],
[0,14,7,11,10,4,13,1,5,8,12,6,9,3,2,15],[13,8,10,1,3,15,4,2,11,6,7,12,0,5,14,9]
],[
[10,0,9,14,6,3,15,5,1,13,12,7,11,4,2,8],[13,7,0,9,3,4,6,10,2,8,5,14,12,11,15,1],
[13,6,4,9,8,15,3,0,11,1,2,12,5,10,14,7],[1,10,13,0,6,9,8,7,4,15,14,3,11,5,2,12]
],[
[7,13,14,3,0,6,9,10,1,2,8,5,11,12,4,15],[13,8,11,5,6,15,0,3,4,7,2,12,1,10,14,9],
[10,6,9,0,12,11,7,13,15,1,3,14,5,2,8,4],[3,15,0,6,10,1,13,8,9,4,5,11,12,7,2,14]
],[
[2,12,4,1,7,10,11,6,8,5,3,15,13,0,14,9],[14,11,2,12,4,7,13,1,5,0,15,10,3,9,8,6],
[4,2,1,11,10,13,7,8,15,9,12,5,6,3,0,14],[11,8,12,7,1,14,2,13,6,15,0,9,10,4,5,3]
],[
[12,1,10,15,9,2,6,8,0,13,3,4,14,7,5,11],[10,15,4,2,7,12,9,5,6,1,13,14,0,11,3,8],
[9,14,15,5,2,8,12,3,7,0,4,10,1,13,11,6],[4,3,2,12,9,5,15,10,11,14,1,7,6,0,8,13]
],[
[4,11,2,14,15,0,8,13,3,12,9,7,5,10,6,1],[13,0,11,7,4,9,1,10,14,3,5,12,2,15,8,6],
[1,4,11,13,12,3,7,14,10,15,6,8,0,5,9,2],[6,11,13,8,1,4,10,7,9,5,0,15,14,2,3,12]
],[
[13,2,8,4,6,15,11,1,10,9,3,14,5,0,12,7],[1,15,13,8,10,3,7,4,12,5,6,11,0,14,9,2],
[7,11,4,1,9,12,14,2,0,6,10,13,15,3,5,8],[2,1,14,7,4,10,8,13,15,12,9,0,3,5,6,11]
]];
EOT
$code.='sub des_dohash2 { my ($in,$l,$r,$ki)=@_; my (@p,$i,$j,$k,$m,$n);';
for($i=0;$i<64;$i++){
$code.="\$p[$i] = \$in->[".($perm3->[$i]-1)."];\n"; }
for($i=0;$i<32;$i++){
$code.="\$l->[$i]=\$p[$i]; \$r->[$i]=\$p[".($i+32)."];\n"; }
$code.='for($i=0;$i<16;$i++){ local (@er,@erk,@b,@cb,@pcb,@r2);';
for($i=0;$i<48;$i++){
$code.="\$erk[$i]=\$r->[".($perm4->[$i]-1)."]^(\$ki->[\$i]->[$i]);\n"; }
for($i=0;$i<8;$i++){
for($j=0;$j<6;$j++){
$code.="\$b[$i][$j]=\$erk[".($i*6+$j)."];\n"; }}
for($i=0;$i<8;$i++){
$code.="\$m=(\$b[$i][0]<<1)|\$b[$i][5];";
$code.="\$n=(\$b[$i][1]<<3)|(\$b[$i][2]<<2)|(\$b[$i][3]<<1)|\$b[$i][4];";
for($j=0;$j<4;$j++){
$code.="\$b[$i][$j]=(\$sbox->[$i][\$m][\$n]&".(1<<(3-$j)).")?1:0;"; }}
for($i=0;$i<8;$i++){
for($j=0;$j<4;$j++){
$code.="\$cb[".($i*4+$j)."]=\$b[$i][$j];\n"; }}
for($i=0;$i<32;$i++){
$code.="\$pcb[$i]=\$cb[".($perm5->[$i]-1)."];\n"; }
for($i=0;$i<32;$i++){
$code.="\$r2[$i]=(\$l->[$i])^\$pcb[$i];\n"; }
for($i=0;$i<32;$i++){
$code.="\$l->[$i]=\$r->[$i]; \$r->[$i]=\$r2[$i];\n"; }
$code.='}}}';
eval "$code";
$generated++;
}
} ##### end of DES container ################################################
=pod
=head1 ++ Sub package: utils
The utils subpackage contains various utility functions which serve
different purposes.
=cut
########################################################################
=pod
=head1 - Function: LW::utils_recperm
Params: $uri, $depth, \@dir_parts, \@valid, \&func, \%track, \%arrays, \&cfunc
Return: nothing
This is a special function which is used to recursively-permutate through
a given directory listing. This is really only used by whisker, in order
to traverse down directories, testing them as it goes. See whisker 2.0 for
exact usage examples.
=cut
# '/', 0, \@dir.split, \@valid, \&func, \%track, \%arrays, \&cfunc
sub utils_recperm {
my ($p, $pp, $pn, $r, $fr, $dr, $ar, $cr)=(shift,shift,@_);
$p=~s#/+#/#g; if($pp >= @$pn) { push @$r, $p if &$cr($$dr{$p});
} else { my $c=$$pn[$pp];
if($c!~/^\@/){ utils_recperm($p.$c.'/',$pp+1,@_) if(&$fr($p.$c.'/'));
} else { $c=~tr/\@//d; if(defined $$ar{$c}){
foreach $d (@{$$ar{$c}}){
if(&$fr($p.$d.'/')){
utils_recperm($p.$d.'/',$pp+1,@_);}}}}}}
#################################################################
=pod
=head1 - Function: LW::utils_array_shuffle
Params: @array
Return: nothing
This function will randomize the order of the elements in the given array.
=cut
sub utils_array_shuffle { # fisher yates shuffle....w00p!
my $array=shift; my $i;
for ($i = @$array; --$i;){
my $j = int rand ($i+1);
next if $i==$j;
@$array[$i,$j]=@$array[$j,$i];
}} # end array_shuffle, from Perl Cookbook (rock!)
#################################################################
=pod
=head1 - Function: LW::utils_randstr
Params: [ $size, $chars ]
Return: $random_string
This function generates a random string between 10 and 20 characters
long, or of $size if specified. If $chars is specified, then the
random function picks characters from the supplied string. For example,
to have a random string of 10 characters, composed of only the characters
'abcdef', then you would run:
LW::utils_randstr(10,'abcdef');
The default character string is alphanumeric.
=cut
sub utils_randstr {
my $str;
my $drift=shift||((rand() * 10) % 10)+10;
# 'a'..'z' doesn't seem to work on string assignment :(
my $CHARS = shift || 'abcdefghijklmnopqrstuvwxyz' .
'ABCDEFGHIJKLMNOPQRSTUVWXYZ' .
'0123456789';
my $L = length($CHARS);
for(1..$drift){
$str .= substr($CHARS,((rand() * $L) % $L),1);
}
return $str;}
#################################################################
=pod
=head1 - Function: LW::utils_get_dir
Params: $uri
Return: $uri_directory
Will take a URI and return the directory base of it, i.e. /rfp/page.php
will return /rfp/.
=cut
sub utils_get_dir {
my ($w,$URL)=(0,shift);
return undef if(!defined $URL);
$URL=substr($URL,0,$w) if( ($w=index($URL,'#')) >= 0);
$URL=substr($URL,0,$w) if( ($w=index($URL,'?')) >= 0);
if( ($w=rindex($URL,'/')) >= 0){
$URL = substr($URL,0,$w+1);
}
return $URL;
}
#################################################################
=pod
=head1 - Function: LW::utils_port_open
Params: $host, $port
Return: $result
Quick function to attempt to make a connection to the given host and
port. If a connection was successfully made, function will return true
(1). Otherwise it returns false (0).
Note: this uses standard TCP connections, thus is not recommended for use
in port-scanning type applications. Extremely slow.
=cut
sub utils_port_open { # this should be platform-safe
my ($target,$port)=@_;
return 0 if(!defined $target || !defined $port);
if(!(socket(S,PF_INET,SOCK_STREAM,0))){ return 0;}
if(connect(S,sockaddr_in($port,inet_aton($target)))){
close(S); return 1;
} else { return 0;}}
#################################################################
=pod
=head1 - Function: LW::utils_split_uri
Params: $uri_string [, \%hin_request]
Return: @uri_parts
Return an array of the following values, in order: uri, protocol, host,
port, params, frag, user, password. Values not defined are given an undef
value. If a %hin_request hash is passed in, then utils_split_uri() will
also set the appropriate values in the hash. While it attempts to do
RFC-compliant URI parsing, it still caters to HTTP[S] only.
Note: utils_split_uri() will only set the %hin_request if the protocol
is HTTP or HTTPS!
=cut
sub utils_split_uri {
my ($uri,$w)=(shift,'',0);
my ($hr)=shift;
my @res=(undef,'http',undef,0,undef,undef,undef,undef);
return undef if(!defined $uri);
# remove fragments
($uri,$res[5])=split('#',$uri,2) if(index($uri,'#',0) >=0);
# get scheme and net_loc
my $net_loc = undef;
if($uri=~s/^([-+.a-z0-9A-Z]+)://){
$res[1]=lc($1);
if(substr($uri,0,2) eq '//'){
$w=index($uri,'/',2);
if($w >= 0){
$net_loc=substr($uri,2,$w-2);
$uri=substr($uri,$w,length($uri)-$w);
} else {
($net_loc=$uri)=~tr#/##d;
$uri='/';
}
}
}
# parse net_loc info
if(defined $net_loc){
if(index($net_loc,'@',0) >=0){
($res[6],$net_loc)=split('@',$net_loc,2);
if(index($res[6],':',0) >=0){
($res[6],$res[7])=split(':',$res[6],2);
}
}
$res[3]=$1 if($net_loc=~s/:([0-9]+)$//);
$res[2]=$net_loc;
}
# remove query info
($uri,$res[4])=split('\?',$uri,2) if(index($uri,'?',0) >=0);
# whatever is left over is the uri
$res[0]=$uri;
if($res[3]==0 && defined $res[1]){
$res[3]=80 if($res[1] eq 'http');
$res[3]=443 if($res[1] eq 'https');
}
return @res if($res[3]==0);
# setup whisker hash
if(defined $hr && ref($hr)){
# these must always exist
$$hr{whisker}->{uri}=$res[0] if(defined $res[0]);
$$hr{whisker}->{ssl}=1 if($res[1] eq 'https');
$$hr{whisker}->{host}=$res[2] if(defined $res[2]);
$$hr{whisker}->{port}=$res[3] ;
# set/delete parameter attributes
if(defined $res[4]){
$$hr{whisker}->{uri_param}=$res[4];
} else { delete $$hr{whisker}->{uri_param}; }
if(defined $res[6]){
$$hr{whisker}->{uri_user}=$res[6];
} else { delete $$hr{whisker}->{uri_user}; }
if(defined $res[7]){
$$hr{whisker}->{uri_password}=$res[7];
} else { delete $$hr{whisker}->{uri_password}; }
}
return @res;
}
#################################################################
=pod
=head1 - Function: LW::utils_lowercase_headers
Params: \%hash
Return: nothing
Will lowercase all the header names (but not values) of the given hash.
=cut
sub utils_lowercase_headers {
goto &utils_lowercase_hashkeys;
}
#################################################################
=pod
=head1 - Function: LW::utils_lowercase_hashkeys
Params: \%hash
Return: nothing
Will lowercase all the header names (but not values) of the given hash.
=cut
sub utils_lowercase_hashkeys {
my $href=shift;
return if(!(defined $href && ref($href)));
while( my ($key,$val)=each %$href ){
delete $$href{$key};
$$href{lc($key)}=$val;
}
}
#################################################################
=pod
=head1 - Function: LW::utils_find_lowercase_key
Params: \%hash, $key
Return: $value, undef on error or not exist
Searches the given hash for the $key (regardless of case), and
returns the value.
=cut
sub utils_find_lowercase_key {
my ($href,$key)=(shift,lc(shift));
return undef if(!(defined $href && ref($href)));
return undef if(!defined $key);
while( my ($k,$v)=each %$href ){
return $v if(lc($k) eq $key);
}
return undef;
}
#################################################################
=pod
=head1 - Function: LW::utils_join_uri
Params: @vals
Return: $url
Takes the @vals array output from utils_split_uri, and returns a single
scalar/string with them joined again, in the form of:
protocol://user:password@host:port/uri?params#frag
=cut
sub utils_join_uri {
my @V=@_;
my $URL;
$URL.=$V[1].':' if defined $V[1];
if(defined $V[2]){
$URL.='//';
if(defined $V[6]){
$URL.=$V[6];
$URL.=':'.$V[7] if defined $V[7];
$URL.='@';
}
$URL.=$V[2];
}
if($V[3]>0){
my $no = 0;
$no++ if($V[3]==80 && defined $V[1] && $V[1] eq 'http');
$no++ if($V[3]==443 && defined $V[1] && $V[1] eq 'https');
$URL .= ':'.$V[3] if(!$no);
}
$URL.=$V[0];
$URL .= '?'.$V[4] if defined $V[4];
$URL .= '#'.$V[5] if defined $V[5];
return $URL;
}
#################################################################
=pod
=head1 - Function: LW::utils_getline
Params: \$data [, $resetpos ]
Return: $line (undef if no more data)
Fetches the next \n terminated line from the given data. Use
the optional $resetpos to reset the internal position pointer.
Does *NOT* return trialing \n.
=cut
{ $POS=0;
sub utils_getline {
my ($dr, $rp)=@_;
return undef if(!(defined $dr && ref($dr)));
$POS=$rp if(defined $rp);
my $where=index($$dr,"\n",$POS);
return undef if($where==-1);
my $str=substr($$dr,$POS,$where-$POS);
$POS=$where+1;
return $str;
}}
#################################################################
=pod
=head1 - Function: LW::utils_getline_crlf
Params: \$data [, $resetpos ]
Return: $line (undef if no more data)
Fetches the next \r\n terminated line from the given data. Use
the optional $resetpos to reset the internal position pointer.
Does *NOT* return trialing \r\n.
=cut
{ $POS=0;
sub utils_getline_crlf {
my ($dr, $rp)=@_;
return undef if(!(defined $dr && ref($dr)));
$POS=$rp if(defined $rp);
my $tpos=$POS;
while(1){
my $where=index($$dr,"\n",$tpos);
return undef if($where==-1);
if(substr($$dr,$where-1,1) eq "\r"){
my $str=substr($$dr,$POS,$where-$POS-1);
$POS=$where+1;
return $str;
} else {
$tpos=$where+1;
}
}
}}
#################################################################
=pod
=head1 - Function: LW::utils_absolute_uri
Params: $uri, $base_uri [, $normalize_flag ]
Return: $absolute_$url
Double checks that the given $uri is in absolute form (that is,
"http://host/file"), and if not (it's in the form "/file"), then
it will append the given $base_uri to make it absolute. This
provides a compatibility similar to that found in the URI
subpackage.
If $normalize_flag is set to 1, then the output will be passed
through utils_normalize_uri before being returned.
=cut
sub utils_absolute_uri {
my ($uri, $buri, $norm)=@_;
return undef if(!defined $uri || !defined $buri);
return $uri if($uri=~m#^[a-zA-Z]+://#);
if(substr($uri,0,1) eq '/'){
if($buri=~m#^[a-zA-Z]+://#){
my @p=utils_split_uri($buri);
$buri="$p[1]://$p[2]";
$buri.=":$p[3]" if($p[3]!=80);
$buri.='/';
} else { # ah suck, base URI isn't absolute...
return $uri;
}
} else {
$buri=~s/[?#].*$//; # remove params and frags
$buri.='/' if($buri=~m#^[a-z]+://[^/]+$#i);
$buri=~s#/[^/]*$#/#;
}
return utils_normalize_uri("$buri$uri")
if(defined $norm && $norm > 0);
return $buri.$uri;
}
#################################################################
=pod
=head1 - Function: LW::utils_normalize_uri
Params: $uri [, $fix_windows_slashes ]
Return: $normalized_uri
Takes the given $uri and does any /./ and /../ dereferencing in
order to come up with the correct absolute URL. If the $fix_
windows_slashes parameter is set to 1, all \ (back slashes) will
be converted to / (forward slashes).
=cut
sub utils_normalize_uri {
my ($host,$uri, $win)=('',@_);
$uri=~tr#\\#/# if(defined $win && $win>0);
if($uri=~s#^([-+.a-z0-9A-Z]+:)##){
return undef if($1 ne 'http:' && $1 ne 'https:');
$host=$1;
return undef unless($uri=~s#^(//[^/]+)##);
$host.=$1; }
return "$host/" if($uri eq '' || $uri eq '/');
# fast path check
return "$host$uri" if(index($uri,'/.')<0);
# parse order/steps as defined in RFC 1808
1 while($uri=~s#/\./#/# || $uri=~s#//#/#);
$uri=~s#/\.$#/#;
1 while($uri=~s#[^/]+/\.\./##);
1 while($uri=~s#^/\.\./#/#);
$uri=~s#[^/]*/\.\.$##;
$uri||='/';
return $host.$uri;
}
#################################################################
=pod
=head1 - Function: LW::utils_save_page
Params: $file, \%response
Return: 0 on success, 1 on error
Saves the data portion of the given whisker %response hash to the
indicated file. Can technically save the data portion of a
%request hash too. A file is not written if there is no data.
Note: LW does not do any special file checking; files are opened
in overwrite mode.
=cut
sub utils_save_page {
my ($file, $hr)=@_;
return 1 if(!ref($hr) || ref($file));
return 0 if(!defined $$hr{'whisker'} ||
!defined $$hr{'whisker'}->{'data'});
open(OUT,">$file") || return 1;
print OUT $$hr{'whisker'}->{'data'};
close(OUT);
return 0;
}
#################################################################
=pod
=head1 - Function: LW::utils_getopts
Params: $opt_str, \%opt_results
Return: 0 on success, 1 on error
This function is a general implementation of GetOpts::Std. It will
parse @ARGV, looking for the options specified in $opt_str, and will
put the results in %opt_results. Behavior/parameter values are
similar to GetOpts::Std's getopts().
Note: this function does *not* support long options (--option),
option grouping (-opq), or options with immediate values (-ovalue).
If an option is indicated as having a value, it will take the next
argument regardless.
=cut
sub utils_getopts {
my ($str,$ref)=@_;
my (%O,$l);
my @left;
return 1 if($str=~tr/-:a-zA-Z0-9//c);
while($str=~m/([a-z0-9]:{0,1})/ig){
$l=$1;
if($l=~tr/://d){ $O{$l}=1;
} else { $O{$l}=0; }
}
while($l=shift(@ARGV)){
push(@left,$l)&&next if(substr($l,0,1) ne '-');
push(@left,$l)&&next if($l eq '-');
substr($l,0,1)='';
if(length($l)!=1){
%$ref=();
return 1; }
if($O{$l}==1){
my $x=shift(@ARGV);
$$ref{$l}=$x;
} else { $$ref{$l}=1; }
}
@ARGV=@left;
return 0;
}
#################################################################
=pod
=head1 - Function: LW::utils_unidecode_uri
Params: $unicode_string
Return: $decoded_string
This function attempts to decode a unicode (UTF-8) string by
converting it into a single-byte-character string. Overlong
characters are converted to their standard characters in place;
non-overlong (aka multi-byte) characters are substituted with the
0xff; invalid encoding characters are left as-is.
Note: this function is useful for dealing with the various unicode
exploits/vulnerabilities found in web servers; it is *not* good for
doing actual UTF-8 parsing, since characters over a single byte are
basically dropped/replaced with a placeholder.
=cut
sub utils_unidecode_uri {
my $str = $_[0];
return $str if($str!~tr/!-~//c); # fastpath
my ($lead,$count,$idx);
my $out='';
my $len = length($str);
my ($ptr,$no,$nu)=(0,0,0);
while($ptr < $len){
my $c=substr($str,$ptr,1);
if( ord($c) >= 0xc0 && ord($c) <= 0xfd){
$count=0;
$c=ord($c)<<1;
while( ($c & 0x80) == 0x80){
$c<<=1;
last if($count++ ==4);
}
$c = ($c & 0xff);
for( $idx=1; $idx<$count; $idx++){
my $o=ord(substr($str,$ptr+$idx,1));
$no=1 if($o != 0x80);
$nu=1 if($o <0x80 || $o >0xbf);
}
my $o=ord(substr($str,$ptr+$idx,1));
$nu=1 if( $o < 0x80 || $o > 0xbf);
if($nu){
$out.=substr($str,$ptr++,1);
} else {
if($no){
$out.="\xff"; # generic replacement char
} else {
my $prior=ord(substr($str,$ptr+$count-1,1))<<6;
$out.= pack("C", (ord(substr($str,$ptr+$count,1) )&0x7f)+$prior);
}
$ptr += $count+1;
}
$no=$nu=0;
} else {
$out.=$c;
$ptr++;
}
}
return $out;
}
#################################################################
=pod
=head1 - Function: LW::utils_text_wrapper
Params: $long_text_string [, $crlf, $width ]
Return: $formatted_test_string
This is a simple function used to format a long line of text for
display on a typical limited-character screen, such as a unix
shell console.
$crlf defaults to "\n", and $width defaults to 76.
=cut
sub utils_text_wrapper {
my ($out,$w,$str,$crlf,$width)=('',0,@_);
$crlf||="\n"; $width||=76;
$str.=$crlf if($str!~/$crlf$/);
return $str if(length($str)<=$width);
while(length($str)>$width){
my $w1=rindex($str,' ',$width);
my $w2=rindex($str,"\t",$width);
if($w1>$w2){ $w=$w1; } else { $w=$w2; }
if($w==-1){ $w=$width;
} else { substr($str,$w,1)=''; }
$out.=substr($str,0,$w,'');
$out.=$crlf;
}
return $out.$str;
}
#################################################################
1;